1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/Scalar/GVN.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DepthFirstIterator.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/AliasAnalysis.h"
29#include "llvm/Analysis/AssumeBundleQueries.h"
30#include "llvm/Analysis/AssumptionCache.h"
31#include "llvm/Analysis/CFG.h"
32#include "llvm/Analysis/DomTreeUpdater.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/InstructionPrecedenceTracking.h"
35#include "llvm/Analysis/InstructionSimplify.h"
36#include "llvm/Analysis/Loads.h"
37#include "llvm/Analysis/LoopInfo.h"
38#include "llvm/Analysis/MemoryBuiltins.h"
39#include "llvm/Analysis/MemoryDependenceAnalysis.h"
40#include "llvm/Analysis/MemorySSA.h"
41#include "llvm/Analysis/MemorySSAUpdater.h"
42#include "llvm/Analysis/OptimizationRemarkEmitter.h"
43#include "llvm/Analysis/PHITransAddr.h"
44#include "llvm/Analysis/TargetLibraryInfo.h"
45#include "llvm/Analysis/ValueTracking.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/IR/PatternMatch.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/InitializePasses.h"
66#include "llvm/Pass.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include "llvm/Transforms/Utils/SSAUpdater.h"
76#include "llvm/Transforms/Utils/VNCoercion.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(Val: true), cl::Hidden);
109static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(Val: true));
110static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
111 cl::init(Val: true));
112static cl::opt<bool>
113GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
114 cl::init(Val: false));
115static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(Val: true));
116static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
117 cl::init(Val: false));
118
119static cl::opt<uint32_t> MaxNumDeps(
120 "gvn-max-num-deps", cl::Hidden, cl::init(Val: 100),
121 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
122
123// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
124static cl::opt<uint32_t> MaxBBSpeculations(
125 "gvn-max-block-speculations", cl::Hidden, cl::init(Val: 600),
126 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
127 "into) when deducing if a value is fully available or not in GVN "
128 "(default = 600)"));
129
130static cl::opt<uint32_t> MaxNumVisitedInsts(
131 "gvn-max-num-visited-insts", cl::Hidden, cl::init(Val: 100),
132 cl::desc("Max number of visited instructions when trying to find "
133 "dominating value of select dependency (default = 100)"));
134
135static cl::opt<uint32_t> MaxNumInsnsPerBlock(
136 "gvn-max-num-insns", cl::Hidden, cl::init(Val: 100),
137 cl::desc("Max number of instructions to scan in each basic block in GVN "
138 "(default = 100)"));
139
140struct llvm::GVNPass::Expression {
141 uint32_t Opcode;
142 bool Commutative = false;
143 // The type is not necessarily the result type of the expression, it may be
144 // any additional type needed to disambiguate the expression.
145 Type *Ty = nullptr;
146 SmallVector<uint32_t, 4> VarArgs;
147
148 AttributeList Attrs;
149
150 Expression(uint32_t Op = ~2U) : Opcode(Op) {}
151
152 bool operator==(const Expression &Other) const {
153 if (Opcode != Other.Opcode)
154 return false;
155 if (Opcode == ~0U || Opcode == ~1U)
156 return true;
157 if (Ty != Other.Ty)
158 return false;
159 if (VarArgs != Other.VarArgs)
160 return false;
161 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
162 !Attrs.intersectWith(C&: Ty->getContext(), Other: Other.Attrs).has_value())
163 return false;
164 return true;
165 }
166
167 friend hash_code hash_value(const Expression &Value) {
168 return hash_combine(args: Value.Opcode, args: Value.Ty,
169 args: hash_combine_range(R: Value.VarArgs));
170 }
171};
172
173namespace llvm {
174
175template <> struct DenseMapInfo<GVNPass::Expression> {
176 static inline GVNPass::Expression getEmptyKey() { return ~0U; }
177 static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
178
179 static unsigned getHashValue(const GVNPass::Expression &E) {
180 using llvm::hash_value;
181
182 return static_cast<unsigned>(hash_value(Value: E));
183 }
184
185 static bool isEqual(const GVNPass::Expression &LHS,
186 const GVNPass::Expression &RHS) {
187 return LHS == RHS;
188 }
189};
190
191} // end namespace llvm
192
193/// Represents a particular available value that we know how to materialize.
194/// Materialization of an AvailableValue never fails. An AvailableValue is
195/// implicitly associated with a rematerialization point which is the
196/// location of the instruction from which it was formed.
197struct llvm::gvn::AvailableValue {
198 enum class ValType {
199 SimpleVal, // A simple offsetted value that is accessed.
200 LoadVal, // A value produced by a load.
201 MemIntrin, // A memory intrinsic which is loaded from.
202 UndefVal, // A UndefValue representing a value from dead block (which
203 // is not yet physically removed from the CFG).
204 SelectVal, // A pointer select which is loaded from and for which the load
205 // can be replace by a value select.
206 };
207
208 /// Val - The value that is live out of the block.
209 Value *Val;
210 /// Kind of the live-out value.
211 ValType Kind;
212
213 /// Offset - The byte offset in Val that is interesting for the load query.
214 unsigned Offset = 0;
215 /// V1, V2 - The dominating non-clobbered values of SelectVal.
216 Value *V1 = nullptr, *V2 = nullptr;
217
218 static AvailableValue get(Value *V, unsigned Offset = 0) {
219 AvailableValue Res;
220 Res.Val = V;
221 Res.Kind = ValType::SimpleVal;
222 Res.Offset = Offset;
223 return Res;
224 }
225
226 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
227 AvailableValue Res;
228 Res.Val = MI;
229 Res.Kind = ValType::MemIntrin;
230 Res.Offset = Offset;
231 return Res;
232 }
233
234 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
235 AvailableValue Res;
236 Res.Val = Load;
237 Res.Kind = ValType::LoadVal;
238 Res.Offset = Offset;
239 return Res;
240 }
241
242 static AvailableValue getUndef() {
243 AvailableValue Res;
244 Res.Val = nullptr;
245 Res.Kind = ValType::UndefVal;
246 Res.Offset = 0;
247 return Res;
248 }
249
250 static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) {
251 AvailableValue Res;
252 Res.Val = Sel;
253 Res.Kind = ValType::SelectVal;
254 Res.Offset = 0;
255 Res.V1 = V1;
256 Res.V2 = V2;
257 return Res;
258 }
259
260 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
261 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
262 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
263 bool isUndefValue() const { return Kind == ValType::UndefVal; }
264 bool isSelectValue() const { return Kind == ValType::SelectVal; }
265
266 Value *getSimpleValue() const {
267 assert(isSimpleValue() && "Wrong accessor");
268 return Val;
269 }
270
271 LoadInst *getCoercedLoadValue() const {
272 assert(isCoercedLoadValue() && "Wrong accessor");
273 return cast<LoadInst>(Val);
274 }
275
276 MemIntrinsic *getMemIntrinValue() const {
277 assert(isMemIntrinValue() && "Wrong accessor");
278 return cast<MemIntrinsic>(Val);
279 }
280
281 SelectInst *getSelectValue() const {
282 assert(isSelectValue() && "Wrong accessor");
283 return cast<SelectInst>(Val);
284 }
285
286 /// Emit code at the specified insertion point to adjust the value defined
287 /// here to the specified type. This handles various coercion cases.
288 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
289};
290
291/// Represents an AvailableValue which can be rematerialized at the end of
292/// the associated BasicBlock.
293struct llvm::gvn::AvailableValueInBlock {
294 /// BB - The basic block in question.
295 BasicBlock *BB = nullptr;
296
297 /// AV - The actual available value.
298 AvailableValue AV;
299
300 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
301 AvailableValueInBlock Res;
302 Res.BB = BB;
303 Res.AV = std::move(AV);
304 return Res;
305 }
306
307 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
308 unsigned Offset = 0) {
309 return get(BB, AV: AvailableValue::get(V, Offset));
310 }
311
312 static AvailableValueInBlock getUndef(BasicBlock *BB) {
313 return get(BB, AV: AvailableValue::getUndef());
314 }
315
316 static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel,
317 Value *V1, Value *V2) {
318 return get(BB, AV: AvailableValue::getSelect(Sel, V1, V2));
319 }
320
321 /// Emit code at the end of this block to adjust the value defined here to
322 /// the specified type. This handles various coercion cases.
323 Value *MaterializeAdjustedValue(LoadInst *Load) const {
324 return AV.MaterializeAdjustedValue(Load, InsertPt: BB->getTerminator());
325 }
326};
327
328//===----------------------------------------------------------------------===//
329// ValueTable Internal Functions
330//===----------------------------------------------------------------------===//
331
332GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
333 Expression E;
334 E.Ty = I->getType();
335 E.Opcode = I->getOpcode();
336 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Val: I)) {
337 // gc.relocate is 'special' call: its second and third operands are
338 // not real values, but indices into statepoint's argument list.
339 // Use the refered to values for purposes of identity.
340 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getOperand(i_nocapture: 0)));
341 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getBasePtr()));
342 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getDerivedPtr()));
343 } else {
344 for (Use &Op : I->operands())
345 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
346 }
347 if (I->isCommutative()) {
348 // Ensure that commutative instructions that only differ by a permutation
349 // of their operands get the same value number by sorting the operand value
350 // numbers. Since commutative operands are the 1st two operands it is more
351 // efficient to sort by hand rather than using, say, std::sort.
352 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
353 if (E.VarArgs[0] > E.VarArgs[1])
354 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
355 E.Commutative = true;
356 }
357
358 if (auto *C = dyn_cast<CmpInst>(Val: I)) {
359 // Sort the operand value numbers so x<y and y>x get the same value number.
360 CmpInst::Predicate Predicate = C->getPredicate();
361 if (E.VarArgs[0] > E.VarArgs[1]) {
362 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
363 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
364 }
365 E.Opcode = (C->getOpcode() << 8) | Predicate;
366 E.Commutative = true;
367 } else if (auto *IVI = dyn_cast<InsertValueInst>(Val: I)) {
368 E.VarArgs.append(in_start: IVI->idx_begin(), in_end: IVI->idx_end());
369 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
370 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
371 E.VarArgs.append(in_start: ShuffleMask.begin(), in_end: ShuffleMask.end());
372 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
373 E.Attrs = CB->getAttributes();
374 }
375
376 return E;
377}
378
379GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
380 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
381 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
382 "Not a comparison!");
383 Expression E;
384 E.Ty = CmpInst::makeCmpResultType(opnd_type: LHS->getType());
385 E.VarArgs.push_back(Elt: lookupOrAdd(V: LHS));
386 E.VarArgs.push_back(Elt: lookupOrAdd(V: RHS));
387
388 // Sort the operand value numbers so x<y and y>x get the same value number.
389 if (E.VarArgs[0] > E.VarArgs[1]) {
390 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
391 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
392 }
393 E.Opcode = (Opcode << 8) | Predicate;
394 E.Commutative = true;
395 return E;
396}
397
398GVNPass::Expression
399GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
400 assert(EI && "Not an ExtractValueInst?");
401 Expression E;
402 E.Ty = EI->getType();
403 E.Opcode = 0;
404
405 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand());
406 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
407 // EI is an extract from one of our with.overflow intrinsics. Synthesize
408 // a semantically equivalent expression instead of an extract value
409 // expression.
410 E.Opcode = WO->getBinaryOp();
411 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getLHS()));
412 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getRHS()));
413 return E;
414 }
415
416 // Not a recognised intrinsic. Fall back to producing an extract value
417 // expression.
418 E.Opcode = EI->getOpcode();
419 for (Use &Op : EI->operands())
420 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
421
422 append_range(C&: E.VarArgs, R: EI->indices());
423
424 return E;
425}
426
427GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
428 Expression E;
429 Type *PtrTy = GEP->getType()->getScalarType();
430 const DataLayout &DL = GEP->getDataLayout();
431 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: PtrTy);
432 SmallMapVector<Value *, APInt, 4> VariableOffsets;
433 APInt ConstantOffset(BitWidth, 0);
434 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
435 // Convert into offset representation, to recognize equivalent address
436 // calculations that use different type encoding.
437 LLVMContext &Context = GEP->getContext();
438 E.Opcode = GEP->getOpcode();
439 E.Ty = nullptr;
440 E.VarArgs.push_back(Elt: lookupOrAdd(V: GEP->getPointerOperand()));
441 for (const auto &[V, Scale] : VariableOffsets) {
442 E.VarArgs.push_back(Elt: lookupOrAdd(V));
443 E.VarArgs.push_back(Elt: lookupOrAdd(V: ConstantInt::get(Context, V: Scale)));
444 }
445 if (!ConstantOffset.isZero())
446 E.VarArgs.push_back(
447 Elt: lookupOrAdd(V: ConstantInt::get(Context, V: ConstantOffset)));
448 } else {
449 // If converting to offset representation fails (for scalable vectors),
450 // fall back to type-based implementation.
451 E.Opcode = GEP->getOpcode();
452 E.Ty = GEP->getSourceElementType();
453 for (Use &Op : GEP->operands())
454 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
455 }
456 return E;
457}
458
459//===----------------------------------------------------------------------===//
460// ValueTable External Functions
461//===----------------------------------------------------------------------===//
462
463GVNPass::ValueTable::ValueTable() = default;
464GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
465GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
466GVNPass::ValueTable::~ValueTable() = default;
467GVNPass::ValueTable &
468GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
469
470/// add - Insert a value into the table with a specified value number.
471void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
472 ValueNumbering.insert(KV: std::make_pair(x&: V, y&: Num));
473 if (PHINode *PN = dyn_cast<PHINode>(Val: V))
474 NumberingPhi[Num] = PN;
475}
476
477/// Include the incoming memory state into the hash of the expression for the
478/// given instruction. If the incoming memory state is:
479/// * LiveOnEntry, add the value number of the entry block,
480/// * a MemoryPhi, add the value number of the basic block corresponding to that
481/// MemoryPhi,
482/// * a MemoryDef, add the value number of the memory setting instruction.
483void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
484 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
485 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
486 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
487 Exp.VarArgs.push_back(Elt: lookupOrAdd(MA));
488}
489
490uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
491 // FIXME: Currently the calls which may access the thread id may
492 // be considered as not accessing the memory. But this is
493 // problematic for coroutines, since coroutines may resume in a
494 // different thread. So we disable the optimization here for the
495 // correctness. However, it may block many other correct
496 // optimizations. Revert this one when we detect the memory
497 // accessing kind more precisely.
498 if (C->getFunction()->isPresplitCoroutine()) {
499 ValueNumbering[C] = NextValueNumber;
500 return NextValueNumber++;
501 }
502
503 // Do not combine convergent calls since they implicitly depend on the set of
504 // threads that is currently executing, and they might be in different basic
505 // blocks.
506 if (C->isConvergent()) {
507 ValueNumbering[C] = NextValueNumber;
508 return NextValueNumber++;
509 }
510
511 if (AA->doesNotAccessMemory(Call: C)) {
512 Expression Exp = createExpr(I: C);
513 uint32_t E = assignExpNewValueNum(Exp).first;
514 ValueNumbering[C] = E;
515 return E;
516 }
517
518 if (MD && AA->onlyReadsMemory(Call: C)) {
519 Expression Exp = createExpr(I: C);
520 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
521 if (IsValNumNew) {
522 ValueNumbering[C] = E;
523 return E;
524 }
525
526 MemDepResult LocalDep = MD->getDependency(QueryInst: C);
527
528 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
529 ValueNumbering[C] = NextValueNumber;
530 return NextValueNumber++;
531 }
532
533 if (LocalDep.isDef()) {
534 // For masked load/store intrinsics, the local_dep may actually be
535 // a normal load or store instruction.
536 CallInst *LocalDepCall = dyn_cast<CallInst>(Val: LocalDep.getInst());
537
538 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
539 ValueNumbering[C] = NextValueNumber;
540 return NextValueNumber++;
541 }
542
543 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
544 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
545 uint32_t LocalDepCallVN = lookupOrAdd(V: LocalDepCall->getArgOperand(i: I));
546 if (CVN != LocalDepCallVN) {
547 ValueNumbering[C] = NextValueNumber;
548 return NextValueNumber++;
549 }
550 }
551
552 uint32_t V = lookupOrAdd(V: LocalDepCall);
553 ValueNumbering[C] = V;
554 return V;
555 }
556
557 // Non-local case.
558 const MemoryDependenceResults::NonLocalDepInfo &Deps =
559 MD->getNonLocalCallDependency(QueryCall: C);
560 // FIXME: Move the checking logic to MemDep!
561 CallInst *CDep = nullptr;
562
563 // Check to see if we have a single dominating call instruction that is
564 // identical to C.
565 for (const NonLocalDepEntry &I : Deps) {
566 if (I.getResult().isNonLocal())
567 continue;
568
569 // We don't handle non-definitions. If we already have a call, reject
570 // instruction dependencies.
571 if (!I.getResult().isDef() || CDep != nullptr) {
572 CDep = nullptr;
573 break;
574 }
575
576 CallInst *NonLocalDepCall = dyn_cast<CallInst>(Val: I.getResult().getInst());
577 // FIXME: All duplicated with non-local case.
578 if (NonLocalDepCall && DT->properlyDominates(A: I.getBB(), B: C->getParent())) {
579 CDep = NonLocalDepCall;
580 continue;
581 }
582
583 CDep = nullptr;
584 break;
585 }
586
587 if (!CDep) {
588 ValueNumbering[C] = NextValueNumber;
589 return NextValueNumber++;
590 }
591
592 if (CDep->arg_size() != C->arg_size()) {
593 ValueNumbering[C] = NextValueNumber;
594 return NextValueNumber++;
595 }
596 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
597 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
598 uint32_t CDepVN = lookupOrAdd(V: CDep->getArgOperand(i: I));
599 if (CVN != CDepVN) {
600 ValueNumbering[C] = NextValueNumber;
601 return NextValueNumber++;
602 }
603 }
604
605 uint32_t V = lookupOrAdd(V: CDep);
606 ValueNumbering[C] = V;
607 return V;
608 }
609
610 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(Call: C)) {
611 Expression Exp = createExpr(I: C);
612 addMemoryStateToExp(I: C, Exp);
613 auto [V, _] = assignExpNewValueNum(Exp);
614 ValueNumbering[C] = V;
615 return V;
616 }
617
618 ValueNumbering[C] = NextValueNumber;
619 return NextValueNumber++;
620}
621
622/// Returns the value number for the specified load or store instruction.
623uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
624 if (!MSSA || !IsMSSAEnabled) {
625 ValueNumbering[I] = NextValueNumber;
626 return NextValueNumber++;
627 }
628
629 Expression Exp;
630 Exp.Ty = I->getType();
631 Exp.Opcode = I->getOpcode();
632 for (Use &Op : I->operands())
633 Exp.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
634 addMemoryStateToExp(I, Exp);
635
636 auto [V, _] = assignExpNewValueNum(Exp);
637 ValueNumbering[I] = V;
638 return V;
639}
640
641/// Returns true if a value number exists for the specified value.
642bool GVNPass::ValueTable::exists(Value *V) const {
643 return ValueNumbering.contains(Val: V);
644}
645
646uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
647 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(Val: MA)
648 ? lookupOrAdd(V: MA->getBlock())
649 : lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
650}
651
652/// lookupOrAdd - Returns the value number for the specified value, assigning
653/// it a new number if it did not have one before.
654uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
655 DenseMap<Value *, uint32_t>::iterator VI = ValueNumbering.find(Val: V);
656 if (VI != ValueNumbering.end())
657 return VI->second;
658
659 auto *I = dyn_cast<Instruction>(Val: V);
660 if (!I) {
661 ValueNumbering[V] = NextValueNumber;
662 if (isa<BasicBlock>(Val: V))
663 NumberingBB[NextValueNumber] = cast<BasicBlock>(Val: V);
664 return NextValueNumber++;
665 }
666
667 Expression Exp;
668 switch (I->getOpcode()) {
669 case Instruction::Call:
670 return lookupOrAddCall(C: cast<CallInst>(Val: I));
671 case Instruction::FNeg:
672 case Instruction::Add:
673 case Instruction::FAdd:
674 case Instruction::Sub:
675 case Instruction::FSub:
676 case Instruction::Mul:
677 case Instruction::FMul:
678 case Instruction::UDiv:
679 case Instruction::SDiv:
680 case Instruction::FDiv:
681 case Instruction::URem:
682 case Instruction::SRem:
683 case Instruction::FRem:
684 case Instruction::Shl:
685 case Instruction::LShr:
686 case Instruction::AShr:
687 case Instruction::And:
688 case Instruction::Or:
689 case Instruction::Xor:
690 case Instruction::ICmp:
691 case Instruction::FCmp:
692 case Instruction::Trunc:
693 case Instruction::ZExt:
694 case Instruction::SExt:
695 case Instruction::FPToUI:
696 case Instruction::FPToSI:
697 case Instruction::UIToFP:
698 case Instruction::SIToFP:
699 case Instruction::FPTrunc:
700 case Instruction::FPExt:
701 case Instruction::PtrToInt:
702 case Instruction::IntToPtr:
703 case Instruction::AddrSpaceCast:
704 case Instruction::BitCast:
705 case Instruction::Select:
706 case Instruction::Freeze:
707 case Instruction::ExtractElement:
708 case Instruction::InsertElement:
709 case Instruction::ShuffleVector:
710 case Instruction::InsertValue:
711 Exp = createExpr(I);
712 break;
713 case Instruction::GetElementPtr:
714 Exp = createGEPExpr(GEP: cast<GetElementPtrInst>(Val: I));
715 break;
716 case Instruction::ExtractValue:
717 Exp = createExtractvalueExpr(EI: cast<ExtractValueInst>(Val: I));
718 break;
719 case Instruction::PHI:
720 ValueNumbering[V] = NextValueNumber;
721 NumberingPhi[NextValueNumber] = cast<PHINode>(Val: V);
722 return NextValueNumber++;
723 case Instruction::Load:
724 case Instruction::Store:
725 return computeLoadStoreVN(I);
726 default:
727 ValueNumbering[V] = NextValueNumber;
728 return NextValueNumber++;
729 }
730
731 uint32_t E = assignExpNewValueNum(Exp).first;
732 ValueNumbering[V] = E;
733 return E;
734}
735
736/// Returns the value number of the specified value. Fails if
737/// the value has not yet been numbered.
738uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
739 DenseMap<Value *, uint32_t>::const_iterator VI = ValueNumbering.find(Val: V);
740 if (Verify) {
741 assert(VI != ValueNumbering.end() && "Value not numbered?");
742 return VI->second;
743 }
744 return (VI != ValueNumbering.end()) ? VI->second : 0;
745}
746
747/// Returns the value number of the given comparison,
748/// assigning it a new number if it did not have one before. Useful when
749/// we deduced the result of a comparison, but don't immediately have an
750/// instruction realizing that comparison to hand.
751uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
752 CmpInst::Predicate Predicate,
753 Value *LHS, Value *RHS) {
754 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
755 return assignExpNewValueNum(Exp).first;
756}
757
758/// Remove all entries from the ValueTable.
759void GVNPass::ValueTable::clear() {
760 ValueNumbering.clear();
761 ExpressionNumbering.clear();
762 NumberingPhi.clear();
763 NumberingBB.clear();
764 PhiTranslateTable.clear();
765 NextValueNumber = 1;
766 Expressions.clear();
767 ExprIdx.clear();
768 NextExprNumber = 0;
769}
770
771/// Remove a value from the value numbering.
772void GVNPass::ValueTable::erase(Value *V) {
773 uint32_t Num = ValueNumbering.lookup(Val: V);
774 ValueNumbering.erase(Val: V);
775 // If V is PHINode, V <--> value number is an one-to-one mapping.
776 if (isa<PHINode>(Val: V))
777 NumberingPhi.erase(Val: Num);
778 else if (isa<BasicBlock>(Val: V))
779 NumberingBB.erase(Val: Num);
780}
781
782/// verifyRemoved - Verify that the value is removed from all internal data
783/// structures.
784void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
785 assert(!ValueNumbering.contains(V) &&
786 "Inst still occurs in value numbering map!");
787}
788
789//===----------------------------------------------------------------------===//
790// LeaderMap External Functions
791//===----------------------------------------------------------------------===//
792
793/// Push a new Value to the LeaderTable onto the list for its value number.
794void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
795 LeaderListNode &Curr = NumToLeaders[N];
796 if (!Curr.Entry.Val) {
797 Curr.Entry.Val = V;
798 Curr.Entry.BB = BB;
799 return;
800 }
801
802 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>();
803 Node->Entry.Val = V;
804 Node->Entry.BB = BB;
805 Node->Next = Curr.Next;
806 Curr.Next = Node;
807}
808
809/// Scan the list of values corresponding to a given
810/// value number, and remove the given instruction if encountered.
811void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
812 const BasicBlock *BB) {
813 LeaderListNode *Prev = nullptr;
814 LeaderListNode *Curr = &NumToLeaders[N];
815
816 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
817 Prev = Curr;
818 Curr = Curr->Next;
819 }
820
821 if (!Curr)
822 return;
823
824 if (Prev) {
825 Prev->Next = Curr->Next;
826 } else {
827 if (!Curr->Next) {
828 Curr->Entry.Val = nullptr;
829 Curr->Entry.BB = nullptr;
830 } else {
831 LeaderListNode *Next = Curr->Next;
832 Curr->Entry.Val = Next->Entry.Val;
833 Curr->Entry.BB = Next->Entry.BB;
834 Curr->Next = Next->Next;
835 }
836 }
837}
838
839void GVNPass::LeaderMap::verifyRemoved(const Value *V) const {
840 // Walk through the value number scope to make sure the instruction isn't
841 // ferreted away in it.
842 for (const auto &I : NumToLeaders) {
843 (void)I;
844 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!");
845 assert(
846 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr),
847 [=](const LeaderTableEntry &E) { return E.Val == V; }) &&
848 "Inst still in value numbering scope!");
849 }
850}
851
852//===----------------------------------------------------------------------===//
853// GVN Pass
854//===----------------------------------------------------------------------===//
855
856bool GVNPass::isPREEnabled() const {
857 return Options.AllowPRE.value_or(u&: GVNEnablePRE);
858}
859
860bool GVNPass::isLoadPREEnabled() const {
861 return Options.AllowLoadPRE.value_or(u&: GVNEnableLoadPRE);
862}
863
864bool GVNPass::isLoadInLoopPREEnabled() const {
865 return Options.AllowLoadInLoopPRE.value_or(u&: GVNEnableLoadInLoopPRE);
866}
867
868bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
869 return Options.AllowLoadPRESplitBackedge.value_or(
870 u&: GVNEnableSplitBackedgeInLoadPRE);
871}
872
873bool GVNPass::isMemDepEnabled() const {
874 return Options.AllowMemDep.value_or(u&: GVNEnableMemDep);
875}
876
877bool GVNPass::isMemorySSAEnabled() const {
878 return Options.AllowMemorySSA.value_or(u&: GVNEnableMemorySSA);
879}
880
881PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
882 // FIXME: The order of evaluation of these 'getResult' calls is very
883 // significant! Re-ordering these variables will cause GVN when run alone to
884 // be less effective! We should fix memdep and basic-aa to not exhibit this
885 // behavior, but until then don't change the order here.
886 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
887 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
888 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
889 auto &AA = AM.getResult<AAManager>(IR&: F);
890 auto *MemDep =
891 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(IR&: F) : nullptr;
892 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
893 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F);
894 if (isMemorySSAEnabled() && !MSSA) {
895 assert(!MemDep &&
896 "On-demand computation of MemSSA implies that MemDep is disabled!");
897 MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F);
898 }
899 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
900 bool Changed = runImpl(F, RunAC&: AC, RunDT&: DT, RunTLI: TLI, RunAA&: AA, RunMD: MemDep, LI, ORE: &ORE,
901 MSSA: MSSA ? &MSSA->getMSSA() : nullptr);
902 if (!Changed)
903 return PreservedAnalyses::all();
904 PreservedAnalyses PA;
905 PA.preserve<DominatorTreeAnalysis>();
906 PA.preserve<TargetLibraryAnalysis>();
907 if (MSSA)
908 PA.preserve<MemorySSAAnalysis>();
909 PA.preserve<LoopAnalysis>();
910 return PA;
911}
912
913void GVNPass::printPipeline(
914 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
915 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
916 OS, MapClassName2PassName);
917
918 OS << '<';
919 if (Options.AllowPRE != std::nullopt)
920 OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
921 if (Options.AllowLoadPRE != std::nullopt)
922 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
923 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
924 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
925 << "split-backedge-load-pre;";
926 if (Options.AllowMemDep != std::nullopt)
927 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
928 if (Options.AllowMemorySSA != std::nullopt)
929 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
930 OS << '>';
931}
932
933void GVNPass::salvageAndRemoveInstruction(Instruction *I) {
934 salvageKnowledge(I, AC);
935 salvageDebugInfo(I&: *I);
936 removeInstruction(I);
937}
938
939#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
940LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
941 errs() << "{\n";
942 for (const auto &[Num, Exp] : Map) {
943 errs() << Num << "\n";
944 Exp->dump();
945 }
946 errs() << "}\n";
947}
948#endif
949
950enum class AvailabilityState : char {
951 /// We know the block *is not* fully available. This is a fixpoint.
952 Unavailable = 0,
953 /// We know the block *is* fully available. This is a fixpoint.
954 Available = 1,
955 /// We do not know whether the block is fully available or not,
956 /// but we are currently speculating that it will be.
957 /// If it would have turned out that the block was, in fact, not fully
958 /// available, this would have been cleaned up into an Unavailable.
959 SpeculativelyAvailable = 2,
960};
961
962/// Return true if we can prove that the value
963/// we're analyzing is fully available in the specified block. As we go, keep
964/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
965/// map is actually a tri-state map with the following values:
966/// 0) we know the block *is not* fully available.
967/// 1) we know the block *is* fully available.
968/// 2) we do not know whether the block is fully available or not, but we are
969/// currently speculating that it will be.
970static bool IsValueFullyAvailableInBlock(
971 BasicBlock *BB,
972 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
973 SmallVector<BasicBlock *, 32> Worklist;
974 std::optional<BasicBlock *> UnavailableBB;
975
976 // The number of times we didn't find an entry for a block in a map and
977 // optimistically inserted an entry marking block as speculatively available.
978 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
979
980#ifndef NDEBUG
981 SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
982 SmallVector<BasicBlock *, 32> AvailableBBs;
983#endif
984
985 Worklist.emplace_back(Args&: BB);
986 while (!Worklist.empty()) {
987 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
988 // Optimistically assume that the block is Speculatively Available and check
989 // to see if we already know about this block in one lookup.
990 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
991 FullyAvailableBlocks.try_emplace(
992 Key: CurrBB, Args: AvailabilityState::SpeculativelyAvailable);
993 AvailabilityState &State = IV.first->second;
994
995 // Did the entry already exist for this block?
996 if (!IV.second) {
997 if (State == AvailabilityState::Unavailable) {
998 UnavailableBB = CurrBB;
999 break; // Backpropagate unavailability info.
1000 }
1001
1002#ifndef NDEBUG
1003 AvailableBBs.emplace_back(CurrBB);
1004#endif
1005 continue; // Don't recurse further, but continue processing worklist.
1006 }
1007
1008 // No entry found for block.
1009 ++NumNewNewSpeculativelyAvailableBBs;
1010 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
1011
1012 // If we have exhausted our budget, mark this block as unavailable.
1013 // Also, if this block has no predecessors, the value isn't live-in here.
1014 if (OutOfBudget || pred_empty(BB: CurrBB)) {
1015 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1016 State = AvailabilityState::Unavailable;
1017 UnavailableBB = CurrBB;
1018 break; // Backpropagate unavailability info.
1019 }
1020
1021 // Tentatively consider this block as speculatively available.
1022#ifndef NDEBUG
1023 NewSpeculativelyAvailableBBs.insert(CurrBB);
1024#endif
1025 // And further recurse into block's predecessors, in depth-first order!
1026 Worklist.append(in_start: pred_begin(BB: CurrBB), in_end: pred_end(BB: CurrBB));
1027 }
1028
1029#if LLVM_ENABLE_STATS
1030 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1031 NumNewNewSpeculativelyAvailableBBs);
1032#endif
1033
1034 // If the block isn't marked as fixpoint yet
1035 // (the Unavailable and Available states are fixpoints).
1036 auto MarkAsFixpointAndEnqueueSuccessors =
1037 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1038 auto It = FullyAvailableBlocks.find(Val: BB);
1039 if (It == FullyAvailableBlocks.end())
1040 return; // Never queried this block, leave as-is.
1041 switch (AvailabilityState &State = It->second) {
1042 case AvailabilityState::Unavailable:
1043 case AvailabilityState::Available:
1044 return; // Don't backpropagate further, continue processing worklist.
1045 case AvailabilityState::SpeculativelyAvailable: // Fix it!
1046 State = FixpointState;
1047#ifndef NDEBUG
1048 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1049 "Found a speculatively available successor leftover?");
1050#endif
1051 // Queue successors for further processing.
1052 Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
1053 return;
1054 }
1055 };
1056
1057 if (UnavailableBB) {
1058 // Okay, we have encountered an unavailable block.
1059 // Mark speculatively available blocks reachable from UnavailableBB as
1060 // unavailable as well. Paths are terminated when they reach blocks not in
1061 // FullyAvailableBlocks or they are not marked as speculatively available.
1062 Worklist.clear();
1063 Worklist.append(in_start: succ_begin(BB: *UnavailableBB), in_end: succ_end(BB: *UnavailableBB));
1064 while (!Worklist.empty())
1065 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1066 AvailabilityState::Unavailable);
1067 }
1068
1069#ifndef NDEBUG
1070 Worklist.clear();
1071 for (BasicBlock *AvailableBB : AvailableBBs)
1072 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1073 while (!Worklist.empty())
1074 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1075 AvailabilityState::Available);
1076
1077 assert(NewSpeculativelyAvailableBBs.empty() &&
1078 "Must have fixed all the new speculatively available blocks.");
1079#endif
1080
1081 return !UnavailableBB;
1082}
1083
1084/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1085/// NewValue.
1086static void replaceValuesPerBlockEntry(
1087 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1088 Value *NewValue) {
1089 for (AvailableValueInBlock &V : ValuesPerBlock) {
1090 if (V.AV.Val == OldValue)
1091 V.AV.Val = NewValue;
1092 if (V.AV.isSelectValue()) {
1093 if (V.AV.V1 == OldValue)
1094 V.AV.V1 = NewValue;
1095 if (V.AV.V2 == OldValue)
1096 V.AV.V2 = NewValue;
1097 }
1098 }
1099}
1100
1101/// Given a set of loads specified by ValuesPerBlock,
1102/// construct SSA form, allowing us to eliminate Load. This returns the value
1103/// that should be used at Load's definition site.
1104static Value *
1105ConstructSSAForLoadSet(LoadInst *Load,
1106 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1107 GVNPass &GVN) {
1108 // Check for the fully redundant, dominating load case. In this case, we can
1109 // just use the dominating value directly.
1110 if (ValuesPerBlock.size() == 1 &&
1111 GVN.getDominatorTree().properlyDominates(A: ValuesPerBlock[0].BB,
1112 B: Load->getParent())) {
1113 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1114 "Dead BB dominate this block");
1115 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1116 }
1117
1118 // Otherwise, we have to construct SSA form.
1119 SmallVector<PHINode*, 8> NewPHIs;
1120 SSAUpdater SSAUpdate(&NewPHIs);
1121 SSAUpdate.Initialize(Ty: Load->getType(), Name: Load->getName());
1122
1123 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1124 BasicBlock *BB = AV.BB;
1125
1126 if (AV.AV.isUndefValue())
1127 continue;
1128
1129 if (SSAUpdate.HasValueForBlock(BB))
1130 continue;
1131
1132 // If the value is the load that we will be eliminating, and the block it's
1133 // available in is the block that the load is in, then don't add it as
1134 // SSAUpdater will resolve the value to the relevant phi which may let it
1135 // avoid phi construction entirely if there's actually only one value.
1136 if (BB == Load->getParent() &&
1137 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1138 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1139 continue;
1140
1141 SSAUpdate.AddAvailableValue(BB, V: AV.MaterializeAdjustedValue(Load));
1142 }
1143
1144 // Perform PHI construction.
1145 return SSAUpdate.GetValueInMiddleOfBlock(BB: Load->getParent());
1146}
1147
1148Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1149 Instruction *InsertPt) const {
1150 Value *Res;
1151 Type *LoadTy = Load->getType();
1152 const DataLayout &DL = Load->getDataLayout();
1153 if (isSimpleValue()) {
1154 Res = getSimpleValue();
1155 if (Res->getType() != LoadTy) {
1156 Res = getValueForLoad(SrcVal: Res, Offset, LoadTy, InsertPt, F: Load->getFunction());
1157
1158 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1159 << " " << *getSimpleValue() << '\n'
1160 << *Res << '\n'
1161 << "\n\n\n");
1162 }
1163 } else if (isCoercedLoadValue()) {
1164 LoadInst *CoercedLoad = getCoercedLoadValue();
1165 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1166 Res = CoercedLoad;
1167 combineMetadataForCSE(K: CoercedLoad, J: Load, DoesKMove: false);
1168 } else {
1169 Res = getValueForLoad(SrcVal: CoercedLoad, Offset, LoadTy, InsertPt,
1170 F: Load->getFunction());
1171 // We are adding a new user for this load, for which the original
1172 // metadata may not hold. Additionally, the new load may have a different
1173 // size and type, so their metadata cannot be combined in any
1174 // straightforward way.
1175 // Drop all metadata that is not known to cause immediate UB on violation,
1176 // unless the load has !noundef, in which case all metadata violations
1177 // will be promoted to UB.
1178 // TODO: We can combine noalias/alias.scope metadata here, because it is
1179 // independent of the load type.
1180 if (!CoercedLoad->hasMetadata(KindID: LLVMContext::MD_noundef))
1181 CoercedLoad->dropUnknownNonDebugMetadata(
1182 KnownIDs: {LLVMContext::MD_dereferenceable,
1183 LLVMContext::MD_dereferenceable_or_null,
1184 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
1185 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1186 << " " << *getCoercedLoadValue() << '\n'
1187 << *Res << '\n'
1188 << "\n\n\n");
1189 }
1190 } else if (isMemIntrinValue()) {
1191 Res = getMemInstValueForLoad(SrcInst: getMemIntrinValue(), Offset, LoadTy,
1192 InsertPt, DL);
1193 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1194 << " " << *getMemIntrinValue() << '\n'
1195 << *Res << '\n'
1196 << "\n\n\n");
1197 } else if (isSelectValue()) {
1198 // Introduce a new value select for a load from an eligible pointer select.
1199 SelectInst *Sel = getSelectValue();
1200 assert(V1 && V2 && "both value operands of the select must be present");
1201 Res =
1202 SelectInst::Create(C: Sel->getCondition(), S1: V1, S2: V2, NameStr: "", InsertBefore: Sel->getIterator());
1203 // We use the DebugLoc from the original load here, as this instruction
1204 // materializes the value that would previously have been loaded.
1205 cast<SelectInst>(Val: Res)->setDebugLoc(Load->getDebugLoc());
1206 } else {
1207 llvm_unreachable("Should not materialize value from dead block");
1208 }
1209 assert(Res && "failed to materialize?");
1210 return Res;
1211}
1212
1213static bool isLifetimeStart(const Instruction *Inst) {
1214 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Val: Inst))
1215 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1216 return false;
1217}
1218
1219/// Assuming To can be reached from both From and Between, does Between lie on
1220/// every path from From to To?
1221static bool liesBetween(const Instruction *From, Instruction *Between,
1222 const Instruction *To, const DominatorTree *DT) {
1223 if (From->getParent() == Between->getParent())
1224 return DT->dominates(Def: From, User: Between);
1225 SmallSet<BasicBlock *, 1> Exclusion;
1226 Exclusion.insert(Ptr: Between->getParent());
1227 return !isPotentiallyReachable(From, To, ExclusionSet: &Exclusion, DT);
1228}
1229
1230static const Instruction *findMayClobberedPtrAccess(LoadInst *Load,
1231 const DominatorTree *DT) {
1232 Value *PtrOp = Load->getPointerOperand();
1233 if (!PtrOp->hasUseList())
1234 return nullptr;
1235
1236 Instruction *OtherAccess = nullptr;
1237
1238 for (auto *U : PtrOp->users()) {
1239 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1240 auto *I = cast<Instruction>(Val: U);
1241 if (I->getFunction() == Load->getFunction() && DT->dominates(Def: I, User: Load)) {
1242 // Use the most immediately dominating value.
1243 if (OtherAccess) {
1244 if (DT->dominates(Def: OtherAccess, User: I))
1245 OtherAccess = I;
1246 else
1247 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1248 } else
1249 OtherAccess = I;
1250 }
1251 }
1252 }
1253
1254 if (OtherAccess)
1255 return OtherAccess;
1256
1257 // There is no dominating use, check if we can find a closest non-dominating
1258 // use that lies between any other potentially available use and Load.
1259 for (auto *U : PtrOp->users()) {
1260 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1261 auto *I = cast<Instruction>(Val: U);
1262 if (I->getFunction() == Load->getFunction() &&
1263 isPotentiallyReachable(From: I, To: Load, ExclusionSet: nullptr, DT)) {
1264 if (OtherAccess) {
1265 if (liesBetween(From: OtherAccess, Between: I, To: Load, DT)) {
1266 OtherAccess = I;
1267 } else if (!liesBetween(From: I, Between: OtherAccess, To: Load, DT)) {
1268 // These uses are both partially available at Load were it not for
1269 // the clobber, but neither lies strictly after the other.
1270 OtherAccess = nullptr;
1271 break;
1272 } // else: keep current OtherAccess since it lies between U and
1273 // Load.
1274 } else {
1275 OtherAccess = I;
1276 }
1277 }
1278 }
1279 }
1280
1281 return OtherAccess;
1282}
1283
1284/// Try to locate the three instruction involved in a missed
1285/// load-elimination case that is due to an intervening store.
1286static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
1287 const DominatorTree *DT,
1288 OptimizationRemarkEmitter *ORE) {
1289 using namespace ore;
1290
1291 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1292 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1293 << setExtraArgs();
1294
1295 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1296 if (OtherAccess)
1297 R << " in favor of " << NV("OtherAccess", OtherAccess);
1298
1299 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
1300
1301 ORE->emit(OptDiag&: R);
1302}
1303
1304// Find non-clobbered value for Loc memory location in extended basic block
1305// (chain of basic blocks with single predecessors) starting From instruction.
1306static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1307 Instruction *From, AAResults *AA) {
1308 uint32_t NumVisitedInsts = 0;
1309 BasicBlock *FromBB = From->getParent();
1310 BatchAAResults BatchAA(*AA);
1311 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1312 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1313 Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) {
1314 // Stop the search if limit is reached.
1315 if (++NumVisitedInsts > MaxNumVisitedInsts)
1316 return nullptr;
1317 if (isModSet(MRI: BatchAA.getModRefInfo(I: Inst, OptLoc: Loc)))
1318 return nullptr;
1319 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1320 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1321 return LI;
1322 }
1323 return nullptr;
1324}
1325
1326std::optional<AvailableValue>
1327GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
1328 Value *Address) {
1329 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1330 assert(DepInfo.isLocal() && "expected a local dependence");
1331
1332 Instruction *DepInst = DepInfo.getInst();
1333
1334 const DataLayout &DL = Load->getDataLayout();
1335 if (DepInfo.isClobber()) {
1336 // If the dependence is to a store that writes to a superset of the bits
1337 // read by the load, we can extract the bits we need for the load from the
1338 // stored value.
1339 if (StoreInst *DepSI = dyn_cast<StoreInst>(Val: DepInst)) {
1340 // Can't forward from non-atomic to atomic without violating memory model.
1341 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1342 int Offset =
1343 analyzeLoadFromClobberingStore(LoadTy: Load->getType(), LoadPtr: Address, DepSI, DL);
1344 if (Offset != -1)
1345 return AvailableValue::get(V: DepSI->getValueOperand(), Offset);
1346 }
1347 }
1348
1349 // Check to see if we have something like this:
1350 // load i32* P
1351 // load i8* (P+1)
1352 // if we have this, replace the later with an extraction from the former.
1353 if (LoadInst *DepLoad = dyn_cast<LoadInst>(Val: DepInst)) {
1354 // If this is a clobber and L is the first instruction in its block, then
1355 // we have the first instruction in the entry block.
1356 // Can't forward from non-atomic to atomic without violating memory model.
1357 if (DepLoad != Load && Address &&
1358 Load->isAtomic() <= DepLoad->isAtomic()) {
1359 Type *LoadType = Load->getType();
1360 int Offset = -1;
1361
1362 // If MD reported clobber, check it was nested.
1363 if (DepInfo.isClobber() &&
1364 canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType,
1365 F: DepLoad->getFunction())) {
1366 const auto ClobberOff = MD->getClobberOffset(DepInst: DepLoad);
1367 // GVN has no deal with a negative offset.
1368 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1369 ? -1
1370 : *ClobberOff;
1371 }
1372 if (Offset == -1)
1373 Offset =
1374 analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr: Address, DepLI: DepLoad, DL);
1375 if (Offset != -1)
1376 return AvailableValue::getLoad(Load: DepLoad, Offset);
1377 }
1378 }
1379
1380 // If the clobbering value is a memset/memcpy/memmove, see if we can
1381 // forward a value on from it.
1382 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) {
1383 if (Address && !Load->isAtomic()) {
1384 int Offset = analyzeLoadFromClobberingMemInst(LoadTy: Load->getType(), LoadPtr: Address,
1385 DepMI, DL);
1386 if (Offset != -1)
1387 return AvailableValue::getMI(MI: DepMI, Offset);
1388 }
1389 }
1390
1391 // Nothing known about this clobber, have to be conservative.
1392 LLVM_DEBUG(
1393 // fast print dep, using operator<< on instruction is too slow.
1394 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1395 dbgs() << " is clobbered by " << *DepInst << '\n';);
1396 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1397 reportMayClobberedLoad(Load, DepInfo, DT, ORE);
1398
1399 return std::nullopt;
1400 }
1401 assert(DepInfo.isDef() && "follows from above");
1402
1403 // Loading the alloca -> undef.
1404 // Loading immediately after lifetime begin -> undef.
1405 if (isa<AllocaInst>(Val: DepInst) || isLifetimeStart(Inst: DepInst))
1406 return AvailableValue::get(V: UndefValue::get(T: Load->getType()));
1407
1408 if (Constant *InitVal =
1409 getInitialValueOfAllocation(V: DepInst, TLI, Ty: Load->getType()))
1410 return AvailableValue::get(V: InitVal);
1411
1412 if (StoreInst *S = dyn_cast<StoreInst>(Val: DepInst)) {
1413 // Reject loads and stores that are to the same address but are of
1414 // different types if we have to. If the stored value is convertable to
1415 // the loaded value, we can reuse it.
1416 if (!canCoerceMustAliasedValueToLoad(StoredVal: S->getValueOperand(), LoadTy: Load->getType(),
1417 F: S->getFunction()))
1418 return std::nullopt;
1419
1420 // Can't forward from non-atomic to atomic without violating memory model.
1421 if (S->isAtomic() < Load->isAtomic())
1422 return std::nullopt;
1423
1424 return AvailableValue::get(V: S->getValueOperand());
1425 }
1426
1427 if (LoadInst *LD = dyn_cast<LoadInst>(Val: DepInst)) {
1428 // If the types mismatch and we can't handle it, reject reuse of the load.
1429 // If the stored value is larger or equal to the loaded value, we can reuse
1430 // it.
1431 if (!canCoerceMustAliasedValueToLoad(StoredVal: LD, LoadTy: Load->getType(),
1432 F: LD->getFunction()))
1433 return std::nullopt;
1434
1435 // Can't forward from non-atomic to atomic without violating memory model.
1436 if (LD->isAtomic() < Load->isAtomic())
1437 return std::nullopt;
1438
1439 return AvailableValue::getLoad(Load: LD);
1440 }
1441
1442 // Check if load with Addr dependent from select can be converted to select
1443 // between load values. There must be no instructions between the found
1444 // loads and DepInst that may clobber the loads.
1445 if (auto *Sel = dyn_cast<SelectInst>(Val: DepInst)) {
1446 assert(Sel->getType() == Load->getPointerOperandType());
1447 auto Loc = MemoryLocation::get(LI: Load);
1448 Value *V1 =
1449 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getTrueValue()),
1450 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1451 if (!V1)
1452 return std::nullopt;
1453 Value *V2 =
1454 findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getFalseValue()),
1455 LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis());
1456 if (!V2)
1457 return std::nullopt;
1458 return AvailableValue::getSelect(Sel, V1, V2);
1459 }
1460
1461 // Unknown def - must be conservative.
1462 LLVM_DEBUG(
1463 // fast print dep, using operator<< on instruction is too slow.
1464 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1465 dbgs() << " has unknown def " << *DepInst << '\n';);
1466 return std::nullopt;
1467}
1468
1469void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
1470 AvailValInBlkVect &ValuesPerBlock,
1471 UnavailBlkVect &UnavailableBlocks) {
1472 // Filter out useless results (non-locals, etc). Keep track of the blocks
1473 // where we have a value available in repl, also keep track of whether we see
1474 // dependencies that produce an unknown value for the load (such as a call
1475 // that could potentially clobber the load).
1476 for (const auto &Dep : Deps) {
1477 BasicBlock *DepBB = Dep.getBB();
1478 MemDepResult DepInfo = Dep.getResult();
1479
1480 if (DeadBlocks.count(key: DepBB)) {
1481 // Dead dependent mem-op disguise as a load evaluating the same value
1482 // as the load in question.
1483 ValuesPerBlock.push_back(Elt: AvailableValueInBlock::getUndef(BB: DepBB));
1484 continue;
1485 }
1486
1487 if (!DepInfo.isLocal()) {
1488 UnavailableBlocks.push_back(Elt: DepBB);
1489 continue;
1490 }
1491
1492 // The address being loaded in this non-local block may not be the same as
1493 // the pointer operand of the load if PHI translation occurs. Make sure
1494 // to consider the right address.
1495 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address: Dep.getAddress())) {
1496 // subtlety: because we know this was a non-local dependency, we know
1497 // it's safe to materialize anywhere between the instruction within
1498 // DepInfo and the end of it's block.
1499 ValuesPerBlock.push_back(
1500 Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV)));
1501 } else {
1502 UnavailableBlocks.push_back(Elt: DepBB);
1503 }
1504 }
1505
1506 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1507 "post condition violation");
1508}
1509
1510/// Given the following code, v1 is partially available on some edges, but not
1511/// available on the edge from PredBB. This function tries to find if there is
1512/// another identical load in the other successor of PredBB.
1513///
1514/// v0 = load %addr
1515/// br %LoadBB
1516///
1517/// LoadBB:
1518/// v1 = load %addr
1519/// ...
1520///
1521/// PredBB:
1522/// ...
1523/// br %cond, label %LoadBB, label %SuccBB
1524///
1525/// SuccBB:
1526/// v2 = load %addr
1527/// ...
1528///
1529LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1530 LoadInst *Load) {
1531 // For simplicity we handle a Pred has 2 successors only.
1532 auto *Term = Pred->getTerminator();
1533 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1534 return nullptr;
1535 auto *SuccBB = Term->getSuccessor(Idx: 0);
1536 if (SuccBB == LoadBB)
1537 SuccBB = Term->getSuccessor(Idx: 1);
1538 if (!SuccBB->getSinglePredecessor())
1539 return nullptr;
1540
1541 unsigned int NumInsts = MaxNumInsnsPerBlock;
1542 for (Instruction &Inst : *SuccBB) {
1543 if (Inst.isDebugOrPseudoInst())
1544 continue;
1545 if (--NumInsts == 0)
1546 return nullptr;
1547
1548 if (!Inst.isIdenticalTo(I: Load))
1549 continue;
1550
1551 MemDepResult Dep = MD->getDependency(QueryInst: &Inst);
1552 // If an identical load doesn't depends on any local instructions, it can
1553 // be safely moved to PredBB.
1554 // Also check for the implicit control flow instructions. See the comments
1555 // in PerformLoadPRE for details.
1556 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(Insn: &Inst))
1557 return cast<LoadInst>(Val: &Inst);
1558
1559 // Otherwise there is something in the same BB clobbers the memory, we can't
1560 // move this and later load to PredBB.
1561 return nullptr;
1562 }
1563
1564 return nullptr;
1565}
1566
1567void GVNPass::eliminatePartiallyRedundantLoad(
1568 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1569 MapVector<BasicBlock *, Value *> &AvailableLoads,
1570 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1571 for (const auto &AvailableLoad : AvailableLoads) {
1572 BasicBlock *UnavailableBlock = AvailableLoad.first;
1573 Value *LoadPtr = AvailableLoad.second;
1574
1575 auto *NewLoad = new LoadInst(
1576 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1577 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1578 UnavailableBlock->getTerminator()->getIterator());
1579 NewLoad->setDebugLoc(Load->getDebugLoc());
1580 if (MSSAU) {
1581 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1582 I: NewLoad, Definition: nullptr, BB: NewLoad->getParent(), Point: MemorySSA::BeforeTerminator);
1583 if (auto *NewDef = dyn_cast<MemoryDef>(Val: NewAccess))
1584 MSSAU->insertDef(Def: NewDef, /*RenameUses=*/true);
1585 else
1586 MSSAU->insertUse(Use: cast<MemoryUse>(Val: NewAccess), /*RenameUses=*/true);
1587 }
1588
1589 // Transfer the old load's AA tags to the new load.
1590 AAMDNodes Tags = Load->getAAMetadata();
1591 if (Tags)
1592 NewLoad->setAAMetadata(Tags);
1593
1594 if (auto *MD = Load->getMetadata(KindID: LLVMContext::MD_invariant_load))
1595 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
1596 if (auto *InvGroupMD = Load->getMetadata(KindID: LLVMContext::MD_invariant_group))
1597 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: InvGroupMD);
1598 if (auto *RangeMD = Load->getMetadata(KindID: LLVMContext::MD_range))
1599 NewLoad->setMetadata(KindID: LLVMContext::MD_range, Node: RangeMD);
1600 if (auto *AccessMD = Load->getMetadata(KindID: LLVMContext::MD_access_group))
1601 if (LI->getLoopFor(BB: Load->getParent()) == LI->getLoopFor(BB: UnavailableBlock))
1602 NewLoad->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessMD);
1603
1604 // We do not propagate the old load's debug location, because the new
1605 // load now lives in a different BB, and we want to avoid a jumpy line
1606 // table.
1607 // FIXME: How do we retain source locations without causing poor debugging
1608 // behavior?
1609
1610 // Add the newly created load.
1611 ValuesPerBlock.push_back(
1612 Elt: AvailableValueInBlock::get(BB: UnavailableBlock, V: NewLoad));
1613 MD->invalidateCachedPointerInfo(Ptr: LoadPtr);
1614 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1615
1616 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1617 // load instruction with the new created load instruction.
1618 if (CriticalEdgePredAndLoad) {
1619 auto It = CriticalEdgePredAndLoad->find(Key: UnavailableBlock);
1620 if (It != CriticalEdgePredAndLoad->end()) {
1621 ++NumPRELoadMoved2CEPred;
1622 ICF->insertInstructionTo(Inst: NewLoad, BB: UnavailableBlock);
1623 LoadInst *OldLoad = It->second;
1624 combineMetadataForCSE(K: NewLoad, J: OldLoad, DoesKMove: false);
1625 OldLoad->replaceAllUsesWith(V: NewLoad);
1626 replaceValuesPerBlockEntry(ValuesPerBlock, OldValue: OldLoad, NewValue: NewLoad);
1627 if (uint32_t ValNo = VN.lookup(V: OldLoad, Verify: false))
1628 LeaderTable.erase(N: ValNo, I: OldLoad, BB: OldLoad->getParent());
1629 removeInstruction(I: OldLoad);
1630 }
1631 }
1632 }
1633
1634 // Perform PHI construction.
1635 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
1636 // ConstructSSAForLoadSet is responsible for combining metadata.
1637 ICF->removeUsersOf(Inst: Load);
1638 Load->replaceAllUsesWith(V);
1639 if (isa<PHINode>(Val: V))
1640 V->takeName(V: Load);
1641 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1642 I->setDebugLoc(Load->getDebugLoc());
1643 if (V->getType()->isPtrOrPtrVectorTy())
1644 MD->invalidateCachedPointerInfo(Ptr: V);
1645 ORE->emit(RemarkBuilder: [&]() {
1646 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1647 << "load eliminated by PRE";
1648 });
1649 salvageAndRemoveInstruction(I: Load);
1650}
1651
1652bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1653 UnavailBlkVect &UnavailableBlocks) {
1654 // Okay, we have *some* definitions of the value. This means that the value
1655 // is available in some of our (transitive) predecessors. Lets think about
1656 // doing PRE of this load. This will involve inserting a new load into the
1657 // predecessor when it's not available. We could do this in general, but
1658 // prefer to not increase code size. As such, we only do this when we know
1659 // that we only have to insert *one* load (which means we're basically moving
1660 // the load, not inserting a new one).
1661
1662 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1663
1664 // Let's find the first basic block with more than one predecessor. Walk
1665 // backwards through predecessors if needed.
1666 BasicBlock *LoadBB = Load->getParent();
1667 BasicBlock *TmpBB = LoadBB;
1668
1669 // Check that there is no implicit control flow instructions above our load in
1670 // its block. If there is an instruction that doesn't always pass the
1671 // execution to the following instruction, then moving through it may become
1672 // invalid. For example:
1673 //
1674 // int arr[LEN];
1675 // int index = ???;
1676 // ...
1677 // guard(0 <= index && index < LEN);
1678 // use(arr[index]);
1679 //
1680 // It is illegal to move the array access to any point above the guard,
1681 // because if the index is out of bounds we should deoptimize rather than
1682 // access the array.
1683 // Check that there is no guard in this block above our instruction.
1684 bool MustEnsureSafetyOfSpeculativeExecution =
1685 ICF->isDominatedByICFIFromSameBlock(Insn: Load);
1686
1687 while (TmpBB->getSinglePredecessor()) {
1688 TmpBB = TmpBB->getSinglePredecessor();
1689 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1690 return false;
1691 if (Blockers.count(Ptr: TmpBB))
1692 return false;
1693
1694 // If any of these blocks has more than one successor (i.e. if the edge we
1695 // just traversed was critical), then there are other paths through this
1696 // block along which the load may not be anticipated. Hoisting the load
1697 // above this block would be adding the load to execution paths along
1698 // which it was not previously executed.
1699 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1700 return false;
1701
1702 // Check that there is no implicit control flow in a block above.
1703 MustEnsureSafetyOfSpeculativeExecution =
1704 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(BB: TmpBB);
1705 }
1706
1707 assert(TmpBB);
1708 LoadBB = TmpBB;
1709
1710 // Check to see how many predecessors have the loaded value fully
1711 // available.
1712 MapVector<BasicBlock *, Value *> PredLoads;
1713 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1714 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1715 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1716 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1717 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1718
1719 // The edge from Pred to LoadBB is a critical edge will be splitted.
1720 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1721 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1722 // contains a load can be moved to Pred. This data structure maps the Pred to
1723 // the movable load.
1724 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1725 for (BasicBlock *Pred : predecessors(BB: LoadBB)) {
1726 // If any predecessor block is an EH pad that does not allow non-PHI
1727 // instructions before the terminator, we can't PRE the load.
1728 if (Pred->getTerminator()->isEHPad()) {
1729 LLVM_DEBUG(
1730 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1731 << Pred->getName() << "': " << *Load << '\n');
1732 return false;
1733 }
1734
1735 if (IsValueFullyAvailableInBlock(BB: Pred, FullyAvailableBlocks)) {
1736 continue;
1737 }
1738
1739 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1740 if (isa<IndirectBrInst>(Val: Pred->getTerminator())) {
1741 LLVM_DEBUG(
1742 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1743 << Pred->getName() << "': " << *Load << '\n');
1744 return false;
1745 }
1746
1747 if (LoadBB->isEHPad()) {
1748 LLVM_DEBUG(
1749 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1750 << Pred->getName() << "': " << *Load << '\n');
1751 return false;
1752 }
1753
1754 // Do not split backedge as it will break the canonical loop form.
1755 if (!isLoadPRESplitBackedgeEnabled())
1756 if (DT->dominates(A: LoadBB, B: Pred)) {
1757 LLVM_DEBUG(
1758 dbgs()
1759 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1760 << Pred->getName() << "': " << *Load << '\n');
1761 return false;
1762 }
1763
1764 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1765 CriticalEdgePredAndLoad[Pred] = LI;
1766 else
1767 CriticalEdgePredSplit.push_back(Elt: Pred);
1768 } else {
1769 // Only add the predecessors that will not be split for now.
1770 PredLoads[Pred] = nullptr;
1771 }
1772 }
1773
1774 // Decide whether PRE is profitable for this load.
1775 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1776 unsigned NumUnavailablePreds = NumInsertPreds +
1777 CriticalEdgePredAndLoad.size();
1778 assert(NumUnavailablePreds != 0 &&
1779 "Fully available value should already be eliminated!");
1780 (void)NumUnavailablePreds;
1781
1782 // If we need to insert new load in multiple predecessors, reject it.
1783 // FIXME: If we could restructure the CFG, we could make a common pred with
1784 // all the preds that don't have an available Load and insert a new load into
1785 // that one block.
1786 if (NumInsertPreds > 1)
1787 return false;
1788
1789 // Now we know where we will insert load. We must ensure that it is safe
1790 // to speculatively execute the load at that points.
1791 if (MustEnsureSafetyOfSpeculativeExecution) {
1792 if (CriticalEdgePredSplit.size())
1793 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: &*LoadBB->getFirstNonPHIIt(), AC,
1794 DT))
1795 return false;
1796 for (auto &PL : PredLoads)
1797 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: PL.first->getTerminator(), AC,
1798 DT))
1799 return false;
1800 for (auto &CEP : CriticalEdgePredAndLoad)
1801 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: CEP.first->getTerminator(), AC,
1802 DT))
1803 return false;
1804 }
1805
1806 // Split critical edges, and update the unavailable predecessors accordingly.
1807 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1808 BasicBlock *NewPred = splitCriticalEdges(Pred: OrigPred, Succ: LoadBB);
1809 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1810 PredLoads[NewPred] = nullptr;
1811 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1812 << LoadBB->getName() << '\n');
1813 }
1814
1815 for (auto &CEP : CriticalEdgePredAndLoad)
1816 PredLoads[CEP.first] = nullptr;
1817
1818 // Check if the load can safely be moved to all the unavailable predecessors.
1819 bool CanDoPRE = true;
1820 const DataLayout &DL = Load->getDataLayout();
1821 SmallVector<Instruction*, 8> NewInsts;
1822 for (auto &PredLoad : PredLoads) {
1823 BasicBlock *UnavailablePred = PredLoad.first;
1824
1825 // Do PHI translation to get its value in the predecessor if necessary. The
1826 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1827 // We do the translation for each edge we skipped by going from Load's block
1828 // to LoadBB, otherwise we might miss pieces needing translation.
1829
1830 // If all preds have a single successor, then we know it is safe to insert
1831 // the load on the pred (?!?), so we can insert code to materialize the
1832 // pointer if it is not available.
1833 Value *LoadPtr = Load->getPointerOperand();
1834 BasicBlock *Cur = Load->getParent();
1835 while (Cur != LoadBB) {
1836 PHITransAddr Address(LoadPtr, DL, AC);
1837 LoadPtr = Address.translateWithInsertion(CurBB: Cur, PredBB: Cur->getSinglePredecessor(),
1838 DT: *DT, NewInsts);
1839 if (!LoadPtr) {
1840 CanDoPRE = false;
1841 break;
1842 }
1843 Cur = Cur->getSinglePredecessor();
1844 }
1845
1846 if (LoadPtr) {
1847 PHITransAddr Address(LoadPtr, DL, AC);
1848 LoadPtr = Address.translateWithInsertion(CurBB: LoadBB, PredBB: UnavailablePred, DT: *DT,
1849 NewInsts);
1850 }
1851 // If we couldn't find or insert a computation of this phi translated value,
1852 // we fail PRE.
1853 if (!LoadPtr) {
1854 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1855 << *Load->getPointerOperand() << "\n");
1856 CanDoPRE = false;
1857 break;
1858 }
1859
1860 PredLoad.second = LoadPtr;
1861 }
1862
1863 if (!CanDoPRE) {
1864 while (!NewInsts.empty()) {
1865 // Erase instructions generated by the failed PHI translation before
1866 // trying to number them. PHI translation might insert instructions
1867 // in basic blocks other than the current one, and we delete them
1868 // directly, as salvageAndRemoveInstruction only allows removing from the
1869 // current basic block.
1870 NewInsts.pop_back_val()->eraseFromParent();
1871 }
1872 // HINT: Don't revert the edge-splitting as following transformation may
1873 // also need to split these critical edges.
1874 return !CriticalEdgePredSplit.empty();
1875 }
1876
1877 // Okay, we can eliminate this load by inserting a reload in the predecessor
1878 // and using PHI construction to get the value in the other predecessors, do
1879 // it.
1880 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1881 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1882 << " INSTS: " << *NewInsts.back()
1883 << '\n');
1884
1885 // Assign value numbers to the new instructions.
1886 for (Instruction *I : NewInsts) {
1887 // Instructions that have been inserted in predecessor(s) to materialize
1888 // the load address do not retain their original debug locations. Doing
1889 // so could lead to confusing (but correct) source attributions.
1890 I->updateLocationAfterHoist();
1891
1892 // FIXME: We really _ought_ to insert these value numbers into their
1893 // parent's availability map. However, in doing so, we risk getting into
1894 // ordering issues. If a block hasn't been processed yet, we would be
1895 // marking a value as AVAIL-IN, which isn't what we intend.
1896 VN.lookupOrAdd(V: I);
1897 }
1898
1899 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads&: PredLoads,
1900 CriticalEdgePredAndLoad: &CriticalEdgePredAndLoad);
1901 ++NumPRELoad;
1902 return true;
1903}
1904
1905bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1906 AvailValInBlkVect &ValuesPerBlock,
1907 UnavailBlkVect &UnavailableBlocks) {
1908 const Loop *L = LI->getLoopFor(BB: Load->getParent());
1909 // TODO: Generalize to other loop blocks that dominate the latch.
1910 if (!L || L->getHeader() != Load->getParent())
1911 return false;
1912
1913 BasicBlock *Preheader = L->getLoopPreheader();
1914 BasicBlock *Latch = L->getLoopLatch();
1915 if (!Preheader || !Latch)
1916 return false;
1917
1918 Value *LoadPtr = Load->getPointerOperand();
1919 // Must be available in preheader.
1920 if (!L->isLoopInvariant(V: LoadPtr))
1921 return false;
1922
1923 // We plan to hoist the load to preheader without introducing a new fault.
1924 // In order to do it, we need to prove that we cannot side-exit the loop
1925 // once loop header is first entered before execution of the load.
1926 if (ICF->isDominatedByICFIFromSameBlock(Insn: Load))
1927 return false;
1928
1929 BasicBlock *LoopBlock = nullptr;
1930 for (auto *Blocker : UnavailableBlocks) {
1931 // Blockers from outside the loop are handled in preheader.
1932 if (!L->contains(BB: Blocker))
1933 continue;
1934
1935 // Only allow one loop block. Loop header is not less frequently executed
1936 // than each loop block, and likely it is much more frequently executed. But
1937 // in case of multiple loop blocks, we need extra information (such as block
1938 // frequency info) to understand whether it is profitable to PRE into
1939 // multiple loop blocks.
1940 if (LoopBlock)
1941 return false;
1942
1943 // Do not sink into inner loops. This may be non-profitable.
1944 if (L != LI->getLoopFor(BB: Blocker))
1945 return false;
1946
1947 // Blocks that dominate the latch execute on every single iteration, maybe
1948 // except the last one. So PREing into these blocks doesn't make much sense
1949 // in most cases. But the blocks that do not necessarily execute on each
1950 // iteration are sometimes much colder than the header, and this is when
1951 // PRE is potentially profitable.
1952 if (DT->dominates(A: Blocker, B: Latch))
1953 return false;
1954
1955 // Make sure that the terminator itself doesn't clobber.
1956 if (Blocker->getTerminator()->mayWriteToMemory())
1957 return false;
1958
1959 LoopBlock = Blocker;
1960 }
1961
1962 if (!LoopBlock)
1963 return false;
1964
1965 // Make sure the memory at this pointer cannot be freed, therefore we can
1966 // safely reload from it after clobber.
1967 if (LoadPtr->canBeFreed())
1968 return false;
1969
1970 // TODO: Support critical edge splitting if blocker has more than 1 successor.
1971 MapVector<BasicBlock *, Value *> AvailableLoads;
1972 AvailableLoads[LoopBlock] = LoadPtr;
1973 AvailableLoads[Preheader] = LoadPtr;
1974
1975 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
1976 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
1977 /*CriticalEdgePredAndLoad*/ nullptr);
1978 ++NumPRELoopLoad;
1979 return true;
1980}
1981
1982static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
1983 OptimizationRemarkEmitter *ORE) {
1984 using namespace ore;
1985
1986 ORE->emit(RemarkBuilder: [&]() {
1987 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
1988 << "load of type " << NV("Type", Load->getType()) << " eliminated"
1989 << setExtraArgs() << " in favor of "
1990 << NV("InfavorOfValue", AvailableValue);
1991 });
1992}
1993
1994/// Attempt to eliminate a load whose dependencies are
1995/// non-local by performing PHI construction.
1996bool GVNPass::processNonLocalLoad(LoadInst *Load) {
1997 // Non-local speculations are not allowed under asan.
1998 if (Load->getParent()->getParent()->hasFnAttribute(
1999 Kind: Attribute::SanitizeAddress) ||
2000 Load->getParent()->getParent()->hasFnAttribute(
2001 Kind: Attribute::SanitizeHWAddress))
2002 return false;
2003
2004 // Step 1: Find the non-local dependencies of the load.
2005 LoadDepVect Deps;
2006 MD->getNonLocalPointerDependency(QueryInst: Load, Result&: Deps);
2007
2008 // If we had to process more than one hundred blocks to find the
2009 // dependencies, this load isn't worth worrying about. Optimizing
2010 // it will be too expensive.
2011 unsigned NumDeps = Deps.size();
2012 if (NumDeps > MaxNumDeps)
2013 return false;
2014
2015 // If we had a phi translation failure, we'll have a single entry which is a
2016 // clobber in the current block. Reject this early.
2017 if (NumDeps == 1 &&
2018 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
2019 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2020 dbgs() << " has unknown dependencies\n";);
2021 return false;
2022 }
2023
2024 bool Changed = false;
2025 // If this load follows a GEP, see if we can PRE the indices before analyzing.
2026 if (GetElementPtrInst *GEP =
2027 dyn_cast<GetElementPtrInst>(Val: Load->getOperand(i_nocapture: 0))) {
2028 for (Use &U : GEP->indices())
2029 if (Instruction *I = dyn_cast<Instruction>(Val: U.get()))
2030 Changed |= performScalarPRE(I);
2031 }
2032
2033 // Step 2: Analyze the availability of the load.
2034 AvailValInBlkVect ValuesPerBlock;
2035 UnavailBlkVect UnavailableBlocks;
2036 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2037
2038 // If we have no predecessors that produce a known value for this load, exit
2039 // early.
2040 if (ValuesPerBlock.empty())
2041 return Changed;
2042
2043 // Step 3: Eliminate fully redundancy.
2044 //
2045 // If all of the instructions we depend on produce a known value for this
2046 // load, then it is fully redundant and we can use PHI insertion to compute
2047 // its value. Insert PHIs and remove the fully redundant value now.
2048 if (UnavailableBlocks.empty()) {
2049 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2050
2051 // Perform PHI construction.
2052 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
2053 // ConstructSSAForLoadSet is responsible for combining metadata.
2054 ICF->removeUsersOf(Inst: Load);
2055 Load->replaceAllUsesWith(V);
2056
2057 if (isa<PHINode>(Val: V))
2058 V->takeName(V: Load);
2059 if (Instruction *I = dyn_cast<Instruction>(Val: V))
2060 // If instruction I has debug info, then we should not update it.
2061 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2062 // to propagate Load's DebugLoc because Load may not post-dominate I.
2063 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2064 I->setDebugLoc(Load->getDebugLoc());
2065 if (V->getType()->isPtrOrPtrVectorTy())
2066 MD->invalidateCachedPointerInfo(Ptr: V);
2067 ++NumGVNLoad;
2068 reportLoadElim(Load, AvailableValue: V, ORE);
2069 salvageAndRemoveInstruction(I: Load);
2070 return true;
2071 }
2072
2073 // Step 4: Eliminate partial redundancy.
2074 if (!isPREEnabled() || !isLoadPREEnabled())
2075 return Changed;
2076 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(BB: Load->getParent()))
2077 return Changed;
2078
2079 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2080 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2081 return true;
2082
2083 return Changed;
2084}
2085
2086static bool hasUsersIn(Value *V, BasicBlock *BB) {
2087 return any_of(Range: V->users(), P: [BB](User *U) {
2088 auto *I = dyn_cast<Instruction>(Val: U);
2089 return I && I->getParent() == BB;
2090 });
2091}
2092
2093bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2094 Value *V = IntrinsicI->getArgOperand(i: 0);
2095
2096 if (ConstantInt *Cond = dyn_cast<ConstantInt>(Val: V)) {
2097 if (Cond->isZero()) {
2098 Type *Int8Ty = Type::getInt8Ty(C&: V->getContext());
2099 Type *PtrTy = PointerType::get(C&: V->getContext(), AddressSpace: 0);
2100 // Insert a new store to null instruction before the load to indicate that
2101 // this code is not reachable. FIXME: We could insert unreachable
2102 // instruction directly because we can modify the CFG.
2103 auto *NewS =
2104 new StoreInst(PoisonValue::get(T: Int8Ty), Constant::getNullValue(Ty: PtrTy),
2105 IntrinsicI->getIterator());
2106 if (MSSAU) {
2107 const MemoryUseOrDef *FirstNonDom = nullptr;
2108 const auto *AL =
2109 MSSAU->getMemorySSA()->getBlockAccesses(BB: IntrinsicI->getParent());
2110
2111 // If there are accesses in the current basic block, find the first one
2112 // that does not come before NewS. The new memory access is inserted
2113 // after the found access or before the terminator if no such access is
2114 // found.
2115 if (AL) {
2116 for (const auto &Acc : *AL) {
2117 if (auto *Current = dyn_cast<MemoryUseOrDef>(Val: &Acc))
2118 if (!Current->getMemoryInst()->comesBefore(Other: NewS)) {
2119 FirstNonDom = Current;
2120 break;
2121 }
2122 }
2123 }
2124
2125 auto *NewDef =
2126 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2127 I: NewS, Definition: nullptr,
2128 InsertPt: const_cast<MemoryUseOrDef *>(FirstNonDom))
2129 : MSSAU->createMemoryAccessInBB(
2130 I: NewS, Definition: nullptr,
2131 BB: NewS->getParent(), Point: MemorySSA::BeforeTerminator);
2132
2133 MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewDef), /*RenameUses=*/false);
2134 }
2135 }
2136 if (isAssumeWithEmptyBundle(Assume: *IntrinsicI)) {
2137 salvageAndRemoveInstruction(I: IntrinsicI);
2138 return true;
2139 }
2140 return false;
2141 }
2142
2143 if (isa<Constant>(Val: V)) {
2144 // If it's not false, and constant, it must evaluate to true. This means our
2145 // assume is assume(true), and thus, pointless, and we don't want to do
2146 // anything more here.
2147 return false;
2148 }
2149
2150 Constant *True = ConstantInt::getTrue(Context&: V->getContext());
2151 bool Changed = false;
2152
2153 for (BasicBlock *Successor : successors(BB: IntrinsicI->getParent())) {
2154 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
2155
2156 // This property is only true in dominated successors, propagateEquality
2157 // will check dominance for us.
2158 Changed |= propagateEquality(LHS: V, RHS: True, Root: Edge, DominatesByEdge: false);
2159 }
2160
2161 // We can replace assume value with true, which covers cases like this:
2162 // call void @llvm.assume(i1 %cmp)
2163 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
2164 ReplaceOperandsWithMap[V] = True;
2165
2166 // Similarly, after assume(!NotV) we know that NotV == false.
2167 Value *NotV;
2168 if (match(V, P: m_Not(V: m_Value(V&: NotV))))
2169 ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(Context&: V->getContext());
2170
2171 // If we find an equality fact, canonicalize all dominated uses in this block
2172 // to one of the two values. We heuristically choice the "oldest" of the
2173 // two where age is determined by value number. (Note that propagateEquality
2174 // above handles the cross block case.)
2175 //
2176 // Key case to cover are:
2177 // 1)
2178 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
2179 // call void @llvm.assume(i1 %cmp)
2180 // ret float %0 ; will change it to ret float 3.000000e+00
2181 // 2)
2182 // %load = load float, float* %addr
2183 // %cmp = fcmp oeq float %load, %0
2184 // call void @llvm.assume(i1 %cmp)
2185 // ret float %load ; will change it to ret float %0
2186 if (auto *CmpI = dyn_cast<CmpInst>(Val: V)) {
2187 if (CmpI->isEquivalence()) {
2188 Value *CmpLHS = CmpI->getOperand(i_nocapture: 0);
2189 Value *CmpRHS = CmpI->getOperand(i_nocapture: 1);
2190 // Heuristically pick the better replacement -- the choice of heuristic
2191 // isn't terribly important here, but the fact we canonicalize on some
2192 // replacement is for exposing other simplifications.
2193 // TODO: pull this out as a helper function and reuse w/ existing
2194 // (slightly different) logic.
2195 if (isa<Constant>(Val: CmpLHS) && !isa<Constant>(Val: CmpRHS))
2196 std::swap(a&: CmpLHS, b&: CmpRHS);
2197 if (!isa<Instruction>(Val: CmpLHS) && isa<Instruction>(Val: CmpRHS))
2198 std::swap(a&: CmpLHS, b&: CmpRHS);
2199 if ((isa<Argument>(Val: CmpLHS) && isa<Argument>(Val: CmpRHS)) ||
2200 (isa<Instruction>(Val: CmpLHS) && isa<Instruction>(Val: CmpRHS))) {
2201 // Move the 'oldest' value to the right-hand side, using the value
2202 // number as a proxy for age.
2203 uint32_t LVN = VN.lookupOrAdd(V: CmpLHS);
2204 uint32_t RVN = VN.lookupOrAdd(V: CmpRHS);
2205 if (LVN < RVN)
2206 std::swap(a&: CmpLHS, b&: CmpRHS);
2207 }
2208
2209 // Handle degenerate case where we either haven't pruned a dead path or a
2210 // removed a trivial assume yet.
2211 if (isa<Constant>(Val: CmpLHS) && isa<Constant>(Val: CmpRHS))
2212 return Changed;
2213
2214 LLVM_DEBUG(dbgs() << "Replacing dominated uses of "
2215 << *CmpLHS << " with "
2216 << *CmpRHS << " in block "
2217 << IntrinsicI->getParent()->getName() << "\n");
2218
2219 // Setup the replacement map - this handles uses within the same block.
2220 if (hasUsersIn(V: CmpLHS, BB: IntrinsicI->getParent()))
2221 ReplaceOperandsWithMap[CmpLHS] = CmpRHS;
2222
2223 // NOTE: The non-block local cases are handled by the call to
2224 // propagateEquality above; this block is just about handling the block
2225 // local cases. TODO: There's a bunch of logic in propagateEqualiy which
2226 // isn't duplicated for the block local case, can we share it somehow?
2227 }
2228 }
2229 return Changed;
2230}
2231
2232static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2233 patchReplacementInstruction(I, Repl);
2234 I->replaceAllUsesWith(V: Repl);
2235}
2236
2237/// Attempt to eliminate a load, first by eliminating it
2238/// locally, and then attempting non-local elimination if that fails.
2239bool GVNPass::processLoad(LoadInst *L) {
2240 if (!MD)
2241 return false;
2242
2243 // This code hasn't been audited for ordered or volatile memory access.
2244 if (!L->isUnordered())
2245 return false;
2246
2247 if (L->use_empty()) {
2248 salvageAndRemoveInstruction(I: L);
2249 return true;
2250 }
2251
2252 // ... to a pointer that has been loaded from before...
2253 MemDepResult Dep = MD->getDependency(QueryInst: L);
2254
2255 // If it is defined in another block, try harder.
2256 if (Dep.isNonLocal())
2257 return processNonLocalLoad(Load: L);
2258
2259 // Only handle the local case below.
2260 if (!Dep.isLocal()) {
2261 // This might be a NonFuncLocal or an Unknown.
2262 LLVM_DEBUG(
2263 // fast print dep, using operator<< on instruction is too slow.
2264 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2265 dbgs() << " has unknown dependence\n";);
2266 return false;
2267 }
2268
2269 auto AV = AnalyzeLoadAvailability(Load: L, DepInfo: Dep, Address: L->getPointerOperand());
2270 if (!AV)
2271 return false;
2272
2273 Value *AvailableValue = AV->MaterializeAdjustedValue(Load: L, InsertPt: L);
2274
2275 // MaterializeAdjustedValue is responsible for combining metadata.
2276 ICF->removeUsersOf(Inst: L);
2277 L->replaceAllUsesWith(V: AvailableValue);
2278 if (MSSAU)
2279 MSSAU->removeMemoryAccess(I: L);
2280 ++NumGVNLoad;
2281 reportLoadElim(Load: L, AvailableValue, ORE);
2282 salvageAndRemoveInstruction(I: L);
2283 // Tell MDA to reexamine the reused pointer since we might have more
2284 // information after forwarding it.
2285 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2286 MD->invalidateCachedPointerInfo(Ptr: AvailableValue);
2287 return true;
2288}
2289
2290/// Return a pair the first field showing the value number of \p Exp and the
2291/// second field showing whether it is a value number newly created.
2292std::pair<uint32_t, bool>
2293GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2294 uint32_t &E = ExpressionNumbering[Exp];
2295 bool CreateNewValNum = !E;
2296 if (CreateNewValNum) {
2297 Expressions.push_back(x: Exp);
2298 if (ExprIdx.size() < NextValueNumber + 1)
2299 ExprIdx.resize(new_size: NextValueNumber * 2);
2300 E = NextValueNumber;
2301 ExprIdx[NextValueNumber++] = NextExprNumber++;
2302 }
2303 return {E, CreateNewValNum};
2304}
2305
2306/// Return whether all the values related with the same \p num are
2307/// defined in \p BB.
2308bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2309 GVNPass &GVN) {
2310 return all_of(
2311 Range: GVN.LeaderTable.getLeaders(N: Num),
2312 P: [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2313}
2314
2315/// Wrap phiTranslateImpl to provide caching functionality.
2316uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2317 const BasicBlock *PhiBlock,
2318 uint32_t Num, GVNPass &GVN) {
2319 auto FindRes = PhiTranslateTable.find(Val: {Num, Pred});
2320 if (FindRes != PhiTranslateTable.end())
2321 return FindRes->second;
2322 uint32_t NewNum = phiTranslateImpl(BB: Pred, PhiBlock, Num, GVN);
2323 PhiTranslateTable.insert(KV: {{Num, Pred}, NewNum});
2324 return NewNum;
2325}
2326
2327// Return true if the value number \p Num and NewNum have equal value.
2328// Return false if the result is unknown.
2329bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2330 const BasicBlock *Pred,
2331 const BasicBlock *PhiBlock,
2332 GVNPass &GVN) {
2333 CallInst *Call = nullptr;
2334 auto Leaders = GVN.LeaderTable.getLeaders(N: Num);
2335 for (const auto &Entry : Leaders) {
2336 Call = dyn_cast<CallInst>(Val: Entry.Val);
2337 if (Call && Call->getParent() == PhiBlock)
2338 break;
2339 }
2340
2341 if (AA->doesNotAccessMemory(Call))
2342 return true;
2343
2344 if (!MD || !AA->onlyReadsMemory(Call))
2345 return false;
2346
2347 MemDepResult LocalDep = MD->getDependency(QueryInst: Call);
2348 if (!LocalDep.isNonLocal())
2349 return false;
2350
2351 const MemoryDependenceResults::NonLocalDepInfo &Deps =
2352 MD->getNonLocalCallDependency(QueryCall: Call);
2353
2354 // Check to see if the Call has no function local clobber.
2355 for (const NonLocalDepEntry &D : Deps) {
2356 if (D.getResult().isNonFuncLocal())
2357 return true;
2358 }
2359 return false;
2360}
2361
2362/// Translate value number \p Num using phis, so that it has the values of
2363/// the phis in BB.
2364uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2365 const BasicBlock *PhiBlock,
2366 uint32_t Num, GVNPass &GVN) {
2367 // See if we can refine the value number by looking at the PN incoming value
2368 // for the given predecessor.
2369 if (PHINode *PN = NumberingPhi[Num]) {
2370 if (PN->getParent() == PhiBlock)
2371 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I)
2372 if (PN->getIncomingBlock(i: I) == Pred)
2373 if (uint32_t TransVal = lookup(V: PN->getIncomingValue(i: I), Verify: false))
2374 return TransVal;
2375 return Num;
2376 }
2377
2378 if (BasicBlock *BB = NumberingBB[Num]) {
2379 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2380 // Value numbers of basic blocks are used to represent memory state in
2381 // load/store instructions and read-only function calls when said state is
2382 // set by a MemoryPhi.
2383 if (BB != PhiBlock)
2384 return Num;
2385 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2386 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2387 if (MPhi->getIncomingBlock(I: i) != Pred)
2388 continue;
2389 MemoryAccess *MA = MPhi->getIncomingValue(I: i);
2390 if (auto *PredPhi = dyn_cast<MemoryPhi>(Val: MA))
2391 return lookupOrAdd(V: PredPhi->getBlock());
2392 if (MSSA->isLiveOnEntryDef(MA))
2393 return lookupOrAdd(V: &BB->getParent()->getEntryBlock());
2394 return lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
2395 }
2396 llvm_unreachable(
2397 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2398 }
2399
2400 // If there is any value related with Num is defined in a BB other than
2401 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2402 // a backedge. We can do an early exit in that case to save compile time.
2403 if (!areAllValsInBB(Num, BB: PhiBlock, GVN))
2404 return Num;
2405
2406 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2407 return Num;
2408 Expression Exp = Expressions[ExprIdx[Num]];
2409
2410 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
2411 // For InsertValue and ExtractValue, some varargs are index numbers
2412 // instead of value numbers. Those index numbers should not be
2413 // translated.
2414 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
2415 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
2416 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
2417 continue;
2418 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Num: Exp.VarArgs[I], GVN);
2419 }
2420
2421 if (Exp.Commutative) {
2422 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
2423 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
2424 std::swap(a&: Exp.VarArgs[0], b&: Exp.VarArgs[1]);
2425 uint32_t Opcode = Exp.Opcode >> 8;
2426 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
2427 Exp.Opcode = (Opcode << 8) |
2428 CmpInst::getSwappedPredicate(
2429 pred: static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
2430 }
2431 }
2432
2433 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
2434 if (Exp.Opcode == Instruction::Call && NewNum != Num)
2435 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
2436 return NewNum;
2437 }
2438 return Num;
2439}
2440
2441/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
2442/// again.
2443void GVNPass::ValueTable::eraseTranslateCacheEntry(
2444 uint32_t Num, const BasicBlock &CurrBlock) {
2445 for (const BasicBlock *Pred : predecessors(BB: &CurrBlock))
2446 PhiTranslateTable.erase(Val: {Num, Pred});
2447}
2448
2449// In order to find a leader for a given value number at a
2450// specific basic block, we first obtain the list of all Values for that number,
2451// and then scan the list to find one whose block dominates the block in
2452// question. This is fast because dominator tree queries consist of only
2453// a few comparisons of DFS numbers.
2454Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
2455 auto Leaders = LeaderTable.getLeaders(N: Num);
2456 if (Leaders.empty())
2457 return nullptr;
2458
2459 Value *Val = nullptr;
2460 for (const auto &Entry : Leaders) {
2461 if (DT->dominates(A: Entry.BB, B: BB)) {
2462 Val = Entry.Val;
2463 if (isa<Constant>(Val))
2464 return Val;
2465 }
2466 }
2467
2468 return Val;
2469}
2470
2471/// There is an edge from 'Src' to 'Dst'. Return
2472/// true if every path from the entry block to 'Dst' passes via this edge. In
2473/// particular 'Dst' must not be reachable via another edge from 'Src'.
2474static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
2475 DominatorTree *DT) {
2476 // While in theory it is interesting to consider the case in which Dst has
2477 // more than one predecessor, because Dst might be part of a loop which is
2478 // only reachable from Src, in practice it is pointless since at the time
2479 // GVN runs all such loops have preheaders, which means that Dst will have
2480 // been changed to have only one predecessor, namely Src.
2481 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
2482 assert((!Pred || Pred == E.getStart()) &&
2483 "No edge between these basic blocks!");
2484 return Pred != nullptr;
2485}
2486
2487void GVNPass::assignBlockRPONumber(Function &F) {
2488 BlockRPONumber.clear();
2489 uint32_t NextBlockNumber = 1;
2490 ReversePostOrderTraversal<Function *> RPOT(&F);
2491 for (BasicBlock *BB : RPOT)
2492 BlockRPONumber[BB] = NextBlockNumber++;
2493 InvalidBlockRPONumbers = false;
2494}
2495
2496bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
2497 bool Changed = false;
2498 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
2499 Value *Operand = Instr->getOperand(i: OpNum);
2500 auto It = ReplaceOperandsWithMap.find(Key: Operand);
2501 if (It != ReplaceOperandsWithMap.end()) {
2502 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with "
2503 << *It->second << " in instruction " << *Instr << '\n');
2504 Instr->setOperand(i: OpNum, Val: It->second);
2505 Changed = true;
2506 }
2507 }
2508 return Changed;
2509}
2510
2511/// The given values are known to be equal in every block
2512/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
2513/// 'RHS' everywhere in the scope. Returns whether a change was made.
2514/// If DominatesByEdge is false, then it means that we will propagate the RHS
2515/// value starting from the end of Root.Start.
2516bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
2517 const BasicBlockEdge &Root,
2518 bool DominatesByEdge) {
2519 SmallVector<std::pair<Value*, Value*>, 4> Worklist;
2520 Worklist.push_back(Elt: std::make_pair(x&: LHS, y&: RHS));
2521 bool Changed = false;
2522 // For speed, compute a conservative fast approximation to
2523 // DT->dominates(Root, Root.getEnd());
2524 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(E: Root, DT);
2525
2526 while (!Worklist.empty()) {
2527 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
2528 LHS = Item.first; RHS = Item.second;
2529
2530 if (LHS == RHS)
2531 continue;
2532 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
2533
2534 // Don't try to propagate equalities between constants.
2535 if (isa<Constant>(Val: LHS) && isa<Constant>(Val: RHS))
2536 continue;
2537
2538 // Prefer a constant on the right-hand side, or an Argument if no constants.
2539 if (isa<Constant>(Val: LHS) || (isa<Argument>(Val: LHS) && !isa<Constant>(Val: RHS)))
2540 std::swap(a&: LHS, b&: RHS);
2541 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
2542 const DataLayout &DL =
2543 isa<Argument>(Val: LHS)
2544 ? cast<Argument>(Val: LHS)->getParent()->getDataLayout()
2545 : cast<Instruction>(Val: LHS)->getDataLayout();
2546
2547 // If there is no obvious reason to prefer the left-hand side over the
2548 // right-hand side, ensure the longest lived term is on the right-hand side,
2549 // so the shortest lived term will be replaced by the longest lived.
2550 // This tends to expose more simplifications.
2551 uint32_t LVN = VN.lookupOrAdd(V: LHS);
2552 if ((isa<Argument>(Val: LHS) && isa<Argument>(Val: RHS)) ||
2553 (isa<Instruction>(Val: LHS) && isa<Instruction>(Val: RHS))) {
2554 // Move the 'oldest' value to the right-hand side, using the value number
2555 // as a proxy for age.
2556 uint32_t RVN = VN.lookupOrAdd(V: RHS);
2557 if (LVN < RVN) {
2558 std::swap(a&: LHS, b&: RHS);
2559 LVN = RVN;
2560 }
2561 }
2562
2563 // If value numbering later sees that an instruction in the scope is equal
2564 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
2565 // the invariant that instructions only occur in the leader table for their
2566 // own value number (this is used by removeFromLeaderTable), do not do this
2567 // if RHS is an instruction (if an instruction in the scope is morphed into
2568 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
2569 // using the leader table is about compiling faster, not optimizing better).
2570 // The leader table only tracks basic blocks, not edges. Only add to if we
2571 // have the simple case where the edge dominates the end.
2572 if (RootDominatesEnd && !isa<Instruction>(Val: RHS) &&
2573 canReplacePointersIfEqual(From: LHS, To: RHS, DL))
2574 LeaderTable.insert(N: LVN, V: RHS, BB: Root.getEnd());
2575
2576 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
2577 // LHS always has at least one use that is not dominated by Root, this will
2578 // never do anything if LHS has only one use.
2579 if (!LHS->hasOneUse()) {
2580 // Create a callback that captures the DL.
2581 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
2582 return canReplacePointersInUseIfEqual(U, To, DL);
2583 };
2584 unsigned NumReplacements =
2585 DominatesByEdge
2586 ? replaceDominatedUsesWithIf(From: LHS, To: RHS, DT&: *DT, Edge: Root,
2587 ShouldReplace: CanReplacePointersCallBack)
2588 : replaceDominatedUsesWithIf(From: LHS, To: RHS, DT&: *DT, BB: Root.getStart(),
2589 ShouldReplace: CanReplacePointersCallBack);
2590
2591 if (NumReplacements > 0) {
2592 Changed = true;
2593 NumGVNEqProp += NumReplacements;
2594 // Cached information for anything that uses LHS will be invalid.
2595 if (MD)
2596 MD->invalidateCachedPointerInfo(Ptr: LHS);
2597 }
2598 }
2599
2600 // Now try to deduce additional equalities from this one. For example, if
2601 // the known equality was "(A != B)" == "false" then it follows that A and B
2602 // are equal in the scope. Only boolean equalities with an explicit true or
2603 // false RHS are currently supported.
2604 if (!RHS->getType()->isIntegerTy(Bitwidth: 1))
2605 // Not a boolean equality - bail out.
2606 continue;
2607 ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS);
2608 if (!CI)
2609 // RHS neither 'true' nor 'false' - bail out.
2610 continue;
2611 // Whether RHS equals 'true'. Otherwise it equals 'false'.
2612 bool IsKnownTrue = CI->isMinusOne();
2613 bool IsKnownFalse = !IsKnownTrue;
2614
2615 // If "A && B" is known true then both A and B are known true. If "A || B"
2616 // is known false then both A and B are known false.
2617 Value *A, *B;
2618 if ((IsKnownTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) ||
2619 (IsKnownFalse && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
2620 Worklist.push_back(Elt: std::make_pair(x&: A, y&: RHS));
2621 Worklist.push_back(Elt: std::make_pair(x&: B, y&: RHS));
2622 continue;
2623 }
2624
2625 // If we are propagating an equality like "(A == B)" == "true" then also
2626 // propagate the equality A == B. When propagating a comparison such as
2627 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
2628 if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: LHS)) {
2629 Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1);
2630
2631 // If "A == B" is known true, or "A != B" is known false, then replace
2632 // A with B everywhere in the scope. For floating point operations, we
2633 // have to be careful since equality does not always imply equivalance.
2634 if (Cmp->isEquivalence(Invert: IsKnownFalse))
2635 Worklist.push_back(Elt: std::make_pair(x&: Op0, y&: Op1));
2636
2637 // If "A >= B" is known true, replace "A < B" with false everywhere.
2638 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
2639 Constant *NotVal = ConstantInt::get(Ty: Cmp->getType(), V: IsKnownFalse);
2640 // Since we don't have the instruction "A < B" immediately to hand, work
2641 // out the value number that it would have and use that to find an
2642 // appropriate instruction (if any).
2643 uint32_t NextNum = VN.getNextUnusedValueNumber();
2644 uint32_t Num = VN.lookupOrAddCmp(Opcode: Cmp->getOpcode(), Predicate: NotPred, LHS: Op0, RHS: Op1);
2645 // If the number we were assigned was brand new then there is no point in
2646 // looking for an instruction realizing it: there cannot be one!
2647 if (Num < NextNum) {
2648 Value *NotCmp = findLeader(BB: Root.getEnd(), Num);
2649 if (NotCmp && isa<Instruction>(Val: NotCmp)) {
2650 unsigned NumReplacements =
2651 DominatesByEdge
2652 ? replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, Edge: Root)
2653 : replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT,
2654 BB: Root.getStart());
2655 Changed |= NumReplacements > 0;
2656 NumGVNEqProp += NumReplacements;
2657 // Cached information for anything that uses NotCmp will be invalid.
2658 if (MD)
2659 MD->invalidateCachedPointerInfo(Ptr: NotCmp);
2660 }
2661 }
2662 // Ensure that any instruction in scope that gets the "A < B" value number
2663 // is replaced with false.
2664 // The leader table only tracks basic blocks, not edges. Only add to if we
2665 // have the simple case where the edge dominates the end.
2666 if (RootDominatesEnd)
2667 LeaderTable.insert(N: Num, V: NotVal, BB: Root.getEnd());
2668
2669 continue;
2670 }
2671
2672 // Propagate equalities that results from truncation with no unsigned wrap
2673 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
2674 // "false"
2675 if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V&: A)))) {
2676 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: IsKnownTrue));
2677 continue;
2678 }
2679 }
2680
2681 return Changed;
2682}
2683
2684/// When calculating availability, handle an instruction
2685/// by inserting it into the appropriate sets.
2686bool GVNPass::processInstruction(Instruction *I) {
2687 // If the instruction can be easily simplified then do so now in preference
2688 // to value numbering it. Value numbering often exposes redundancies, for
2689 // example if it determines that %y is equal to %x then the instruction
2690 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
2691 const DataLayout &DL = I->getDataLayout();
2692 if (Value *V = simplifyInstruction(I, Q: {DL, TLI, DT, AC})) {
2693 bool Changed = false;
2694 if (!I->use_empty()) {
2695 // Simplification can cause a special instruction to become not special.
2696 // For example, devirtualization to a willreturn function.
2697 ICF->removeUsersOf(Inst: I);
2698 I->replaceAllUsesWith(V);
2699 Changed = true;
2700 }
2701 if (isInstructionTriviallyDead(I, TLI)) {
2702 salvageAndRemoveInstruction(I);
2703 Changed = true;
2704 }
2705 if (Changed) {
2706 if (MD && V->getType()->isPtrOrPtrVectorTy())
2707 MD->invalidateCachedPointerInfo(Ptr: V);
2708 ++NumGVNSimpl;
2709 return true;
2710 }
2711 }
2712
2713 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
2714 return processAssumeIntrinsic(IntrinsicI: Assume);
2715
2716 if (LoadInst *Load = dyn_cast<LoadInst>(Val: I)) {
2717 if (processLoad(L: Load))
2718 return true;
2719
2720 unsigned Num = VN.lookupOrAdd(V: Load);
2721 LeaderTable.insert(N: Num, V: Load, BB: Load->getParent());
2722 return false;
2723 }
2724
2725 // For conditional branches, we can perform simple conditional propagation on
2726 // the condition value itself.
2727 if (BranchInst *BI = dyn_cast<BranchInst>(Val: I)) {
2728 if (!BI->isConditional())
2729 return false;
2730
2731 if (isa<Constant>(Val: BI->getCondition()))
2732 return processFoldableCondBr(BI);
2733
2734 Value *BranchCond = BI->getCondition();
2735 BasicBlock *TrueSucc = BI->getSuccessor(i: 0);
2736 BasicBlock *FalseSucc = BI->getSuccessor(i: 1);
2737 // Avoid multiple edges early.
2738 if (TrueSucc == FalseSucc)
2739 return false;
2740
2741 BasicBlock *Parent = BI->getParent();
2742 bool Changed = false;
2743
2744 Value *TrueVal = ConstantInt::getTrue(Context&: TrueSucc->getContext());
2745 BasicBlockEdge TrueE(Parent, TrueSucc);
2746 Changed |= propagateEquality(LHS: BranchCond, RHS: TrueVal, Root: TrueE, DominatesByEdge: true);
2747
2748 Value *FalseVal = ConstantInt::getFalse(Context&: FalseSucc->getContext());
2749 BasicBlockEdge FalseE(Parent, FalseSucc);
2750 Changed |= propagateEquality(LHS: BranchCond, RHS: FalseVal, Root: FalseE, DominatesByEdge: true);
2751
2752 return Changed;
2753 }
2754
2755 // For switches, propagate the case values into the case destinations.
2756 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: I)) {
2757 Value *SwitchCond = SI->getCondition();
2758 BasicBlock *Parent = SI->getParent();
2759 bool Changed = false;
2760
2761 // Remember how many outgoing edges there are to every successor.
2762 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
2763 for (BasicBlock *Succ : successors(BB: Parent))
2764 ++SwitchEdges[Succ];
2765
2766 for (const auto &Case : SI->cases()) {
2767 BasicBlock *Dst = Case.getCaseSuccessor();
2768 // If there is only a single edge, propagate the case value into it.
2769 if (SwitchEdges.lookup(Val: Dst) == 1) {
2770 BasicBlockEdge E(Parent, Dst);
2771 Changed |= propagateEquality(LHS: SwitchCond, RHS: Case.getCaseValue(), Root: E, DominatesByEdge: true);
2772 }
2773 }
2774 return Changed;
2775 }
2776
2777 // Instructions with void type don't return a value, so there's
2778 // no point in trying to find redundancies in them.
2779 if (I->getType()->isVoidTy())
2780 return false;
2781
2782 uint32_t NextNum = VN.getNextUnusedValueNumber();
2783 unsigned Num = VN.lookupOrAdd(V: I);
2784
2785 // Allocations are always uniquely numbered, so we can save time and memory
2786 // by fast failing them.
2787 if (isa<AllocaInst>(Val: I) || I->isTerminator() || isa<PHINode>(Val: I)) {
2788 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2789 return false;
2790 }
2791
2792 // If the number we were assigned was a brand new VN, then we don't
2793 // need to do a lookup to see if the number already exists
2794 // somewhere in the domtree: it can't!
2795 if (Num >= NextNum) {
2796 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2797 return false;
2798 }
2799
2800 // Perform fast-path value-number based elimination of values inherited from
2801 // dominators.
2802 Value *Repl = findLeader(BB: I->getParent(), Num);
2803 if (!Repl) {
2804 // Failure, just remember this instance for future use.
2805 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
2806 return false;
2807 }
2808
2809 if (Repl == I) {
2810 // If I was the result of a shortcut PRE, it might already be in the table
2811 // and the best replacement for itself. Nothing to do.
2812 return false;
2813 }
2814
2815 // Remove it!
2816 patchAndReplaceAllUsesWith(I, Repl);
2817 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
2818 MD->invalidateCachedPointerInfo(Ptr: Repl);
2819 salvageAndRemoveInstruction(I);
2820 return true;
2821}
2822
2823/// runOnFunction - This is the main transformation entry point for a function.
2824bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
2825 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
2826 MemoryDependenceResults *RunMD, LoopInfo &LI,
2827 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
2828 AC = &RunAC;
2829 DT = &RunDT;
2830 VN.setDomTree(DT);
2831 TLI = &RunTLI;
2832 VN.setAliasAnalysis(&RunAA);
2833 MD = RunMD;
2834 ImplicitControlFlowTracking ImplicitCFT;
2835 ICF = &ImplicitCFT;
2836 this->LI = &LI;
2837 VN.setMemDep(M: MD);
2838 VN.setMemorySSA(M: MSSA);
2839 ORE = RunORE;
2840 InvalidBlockRPONumbers = true;
2841 MemorySSAUpdater Updater(MSSA);
2842 MSSAU = MSSA ? &Updater : nullptr;
2843
2844 bool Changed = false;
2845 bool ShouldContinue = true;
2846
2847 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
2848 // Merge unconditional branches, allowing PRE to catch more
2849 // optimization opportunities.
2850 for (BasicBlock &BB : make_early_inc_range(Range&: F)) {
2851 bool RemovedBlock = MergeBlockIntoPredecessor(BB: &BB, DTU: &DTU, LI: &LI, MSSAU, MemDep: MD);
2852 if (RemovedBlock)
2853 ++NumGVNBlocks;
2854
2855 Changed |= RemovedBlock;
2856 }
2857 DTU.flush();
2858
2859 unsigned Iteration = 0;
2860 while (ShouldContinue) {
2861 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2862 (void) Iteration;
2863 ShouldContinue = iterateOnFunction(F);
2864 Changed |= ShouldContinue;
2865 ++Iteration;
2866 }
2867
2868 if (isPREEnabled()) {
2869 // Fabricate val-num for dead-code in order to suppress assertion in
2870 // performPRE().
2871 assignValNumForDeadCode();
2872 bool PREChanged = true;
2873 while (PREChanged) {
2874 PREChanged = performPRE(F);
2875 Changed |= PREChanged;
2876 }
2877 }
2878
2879 // FIXME: Should perform GVN again after PRE does something. PRE can move
2880 // computations into blocks where they become fully redundant. Note that
2881 // we can't do this until PRE's critical edge splitting updates memdep.
2882 // Actually, when this happens, we should just fully integrate PRE into GVN.
2883
2884 cleanupGlobalSets();
2885 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
2886 // iteration.
2887 DeadBlocks.clear();
2888
2889 if (MSSA && VerifyMemorySSA)
2890 MSSA->verifyMemorySSA();
2891
2892 return Changed;
2893}
2894
2895bool GVNPass::processBlock(BasicBlock *BB) {
2896 if (DeadBlocks.count(key: BB))
2897 return false;
2898
2899 // Clearing map before every BB because it can be used only for single BB.
2900 ReplaceOperandsWithMap.clear();
2901 bool ChangedFunction = false;
2902
2903 // Since we may not have visited the input blocks of the phis, we can't
2904 // use our normal hash approach for phis. Instead, simply look for
2905 // obvious duplicates. The first pass of GVN will tend to create
2906 // identical phis, and the second or later passes can eliminate them.
2907 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
2908 ChangedFunction |= EliminateDuplicatePHINodes(BB, ToRemove&: PHINodesToRemove);
2909 for (PHINode *PN : PHINodesToRemove) {
2910 removeInstruction(I: PN);
2911 }
2912 for (Instruction &Inst : make_early_inc_range(Range&: *BB)) {
2913 if (!ReplaceOperandsWithMap.empty())
2914 ChangedFunction |= replaceOperandsForInBlockEquality(Instr: &Inst);
2915 ChangedFunction |= processInstruction(I: &Inst);
2916 }
2917 return ChangedFunction;
2918}
2919
2920// Instantiate an expression in a predecessor that lacked it.
2921bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
2922 BasicBlock *Curr, unsigned int ValNo) {
2923 // Because we are going top-down through the block, all value numbers
2924 // will be available in the predecessor by the time we need them. Any
2925 // that weren't originally present will have been instantiated earlier
2926 // in this loop.
2927 bool Success = true;
2928 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
2929 Value *Op = Instr->getOperand(i: I);
2930 if (isa<Argument>(Val: Op) || isa<Constant>(Val: Op) || isa<GlobalValue>(Val: Op))
2931 continue;
2932 // This could be a newly inserted instruction, in which case, we won't
2933 // find a value number, and should give up before we hurt ourselves.
2934 // FIXME: Rewrite the infrastructure to let it easier to value number
2935 // and process newly inserted instructions.
2936 if (!VN.exists(V: Op)) {
2937 Success = false;
2938 break;
2939 }
2940 uint32_t TValNo =
2941 VN.phiTranslate(Pred, PhiBlock: Curr, Num: VN.lookup(V: Op), GVN&: *this);
2942 if (Value *V = findLeader(BB: Pred, Num: TValNo)) {
2943 Instr->setOperand(i: I, Val: V);
2944 } else {
2945 Success = false;
2946 break;
2947 }
2948 }
2949
2950 // Fail out if we encounter an operand that is not available in
2951 // the PRE predecessor. This is typically because of loads which
2952 // are not value numbered precisely.
2953 if (!Success)
2954 return false;
2955
2956 Instr->insertBefore(InsertPos: Pred->getTerminator()->getIterator());
2957 Instr->setName(Instr->getName() + ".pre");
2958 Instr->setDebugLoc(Instr->getDebugLoc());
2959
2960 ICF->insertInstructionTo(Inst: Instr, BB: Pred);
2961
2962 unsigned Num = VN.lookupOrAdd(V: Instr);
2963 VN.add(V: Instr, Num);
2964
2965 // Update the availability map to include the new instruction.
2966 LeaderTable.insert(N: Num, V: Instr, BB: Pred);
2967 return true;
2968}
2969
2970bool GVNPass::performScalarPRE(Instruction *CurInst) {
2971 if (isa<AllocaInst>(Val: CurInst) || CurInst->isTerminator() ||
2972 isa<PHINode>(Val: CurInst) || CurInst->getType()->isVoidTy() ||
2973 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects())
2974 return false;
2975
2976 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
2977 // sinking the compare again, and it would force the code generator to
2978 // move the i1 from processor flags or predicate registers into a general
2979 // purpose register.
2980 if (isa<CmpInst>(Val: CurInst))
2981 return false;
2982
2983 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
2984 // sinking the addressing mode computation back to its uses. Extending the
2985 // GEP's live range increases the register pressure, and therefore it can
2986 // introduce unnecessary spills.
2987 //
2988 // This doesn't prevent Load PRE. PHI translation will make the GEP available
2989 // to the load by moving it to the predecessor block if necessary.
2990 if (isa<GetElementPtrInst>(Val: CurInst))
2991 return false;
2992
2993 if (auto *CallB = dyn_cast<CallBase>(Val: CurInst)) {
2994 // We don't currently value number ANY inline asm calls.
2995 if (CallB->isInlineAsm())
2996 return false;
2997 }
2998
2999 uint32_t ValNo = VN.lookup(V: CurInst);
3000
3001 // Look for the predecessors for PRE opportunities. We're
3002 // only trying to solve the basic diamond case, where
3003 // a value is computed in the successor and one predecessor,
3004 // but not the other. We also explicitly disallow cases
3005 // where the successor is its own predecessor, because they're
3006 // more complicated to get right.
3007 unsigned NumWith = 0;
3008 unsigned NumWithout = 0;
3009 BasicBlock *PREPred = nullptr;
3010 BasicBlock *CurrentBlock = CurInst->getParent();
3011
3012 // Update the RPO numbers for this function.
3013 if (InvalidBlockRPONumbers)
3014 assignBlockRPONumber(F&: *CurrentBlock->getParent());
3015
3016 SmallVector<std::pair<Value *, BasicBlock *>, 8> PredMap;
3017 for (BasicBlock *P : predecessors(BB: CurrentBlock)) {
3018 // We're not interested in PRE where blocks with predecessors that are
3019 // not reachable.
3020 if (!DT->isReachableFromEntry(A: P)) {
3021 NumWithout = 2;
3022 break;
3023 }
3024 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
3025 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
3026 "Invalid BlockRPONumber map.");
3027 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
3028 NumWithout = 2;
3029 break;
3030 }
3031
3032 uint32_t TValNo = VN.phiTranslate(Pred: P, PhiBlock: CurrentBlock, Num: ValNo, GVN&: *this);
3033 Value *PredV = findLeader(BB: P, Num: TValNo);
3034 if (!PredV) {
3035 PredMap.push_back(Elt: std::make_pair(x: static_cast<Value *>(nullptr), y&: P));
3036 PREPred = P;
3037 ++NumWithout;
3038 } else if (PredV == CurInst) {
3039 // CurInst dominates this predecessor.
3040 NumWithout = 2;
3041 break;
3042 } else {
3043 PredMap.push_back(Elt: std::make_pair(x&: PredV, y&: P));
3044 ++NumWith;
3045 }
3046 }
3047
3048 // Don't do PRE when it might increase code size, i.e. when
3049 // we would need to insert instructions in more than one pred.
3050 if (NumWithout > 1 || NumWith == 0)
3051 return false;
3052
3053 // We may have a case where all predecessors have the instruction,
3054 // and we just need to insert a phi node. Otherwise, perform
3055 // insertion.
3056 Instruction *PREInstr = nullptr;
3057
3058 if (NumWithout != 0) {
3059 if (!isSafeToSpeculativelyExecute(I: CurInst)) {
3060 // It is only valid to insert a new instruction if the current instruction
3061 // is always executed. An instruction with implicit control flow could
3062 // prevent us from doing it. If we cannot speculate the execution, then
3063 // PRE should be prohibited.
3064 if (ICF->isDominatedByICFIFromSameBlock(Insn: CurInst))
3065 return false;
3066 }
3067
3068 // Don't do PRE across indirect branch.
3069 if (isa<IndirectBrInst>(Val: PREPred->getTerminator()))
3070 return false;
3071
3072 // We can't do PRE safely on a critical edge, so instead we schedule
3073 // the edge to be split and perform the PRE the next time we iterate
3074 // on the function.
3075 unsigned SuccNum = GetSuccessorNumber(BB: PREPred, Succ: CurrentBlock);
3076 if (isCriticalEdge(TI: PREPred->getTerminator(), SuccNum)) {
3077 ToSplit.push_back(Elt: std::make_pair(x: PREPred->getTerminator(), y&: SuccNum));
3078 return false;
3079 }
3080 // We need to insert somewhere, so let's give it a shot.
3081 PREInstr = CurInst->clone();
3082 if (!performScalarPREInsertion(Instr: PREInstr, Pred: PREPred, Curr: CurrentBlock, ValNo)) {
3083 // If we failed insertion, make sure we remove the instruction.
3084#ifndef NDEBUG
3085 verifyRemoved(PREInstr);
3086#endif
3087 PREInstr->deleteValue();
3088 return false;
3089 }
3090 }
3091
3092 // Either we should have filled in the PRE instruction, or we should
3093 // not have needed insertions.
3094 assert(PREInstr != nullptr || NumWithout == 0);
3095
3096 ++NumGVNPRE;
3097
3098 // Create a PHI to make the value available in this block.
3099 PHINode *Phi = PHINode::Create(Ty: CurInst->getType(), NumReservedValues: PredMap.size(),
3100 NameStr: CurInst->getName() + ".pre-phi");
3101 Phi->insertBefore(InsertPos: CurrentBlock->begin());
3102 for (auto &[V, BB] : PredMap) {
3103 if (V) {
3104 // If we use an existing value in this phi, we have to patch the original
3105 // value because the phi will be used to replace a later value.
3106 patchReplacementInstruction(I: CurInst, Repl: V);
3107 Phi->addIncoming(V, BB);
3108 } else
3109 Phi->addIncoming(V: PREInstr, BB: PREPred);
3110 }
3111
3112 VN.add(V: Phi, Num: ValNo);
3113 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3114 // be changed, so erase the related stale entries in phi translate cache.
3115 VN.eraseTranslateCacheEntry(Num: ValNo, CurrBlock: *CurrentBlock);
3116 LeaderTable.insert(N: ValNo, V: Phi, BB: CurrentBlock);
3117 Phi->setDebugLoc(CurInst->getDebugLoc());
3118 CurInst->replaceAllUsesWith(V: Phi);
3119 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3120 MD->invalidateCachedPointerInfo(Ptr: Phi);
3121 LeaderTable.erase(N: ValNo, I: CurInst, BB: CurrentBlock);
3122
3123 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3124 removeInstruction(I: CurInst);
3125 ++NumGVNInstr;
3126
3127 return true;
3128}
3129
3130/// Perform a purely local form of PRE that looks for diamond
3131/// control flow patterns and attempts to perform simple PRE at the join point.
3132bool GVNPass::performPRE(Function &F) {
3133 bool Changed = false;
3134 for (BasicBlock *CurrentBlock : depth_first(G: &F.getEntryBlock())) {
3135 // Nothing to PRE in the entry block.
3136 if (CurrentBlock == &F.getEntryBlock())
3137 continue;
3138
3139 // Don't perform PRE on an EH pad.
3140 if (CurrentBlock->isEHPad())
3141 continue;
3142
3143 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3144 BE = CurrentBlock->end();
3145 BI != BE;) {
3146 Instruction *CurInst = &*BI++;
3147 Changed |= performScalarPRE(CurInst);
3148 }
3149 }
3150
3151 if (splitCriticalEdges())
3152 Changed = true;
3153
3154 return Changed;
3155}
3156
3157/// Split the critical edge connecting the given two blocks, and return
3158/// the block inserted to the critical edge.
3159BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3160 // GVN does not require loop-simplify, do not try to preserve it if it is not
3161 // possible.
3162 BasicBlock *BB = SplitCriticalEdge(
3163 Src: Pred, Dst: Succ,
3164 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3165 if (BB) {
3166 if (MD)
3167 MD->invalidateCachedPredecessors();
3168 InvalidBlockRPONumbers = true;
3169 }
3170 return BB;
3171}
3172
3173/// Split critical edges found during the previous
3174/// iteration that may enable further optimization.
3175bool GVNPass::splitCriticalEdges() {
3176 if (ToSplit.empty())
3177 return false;
3178
3179 bool Changed = false;
3180 do {
3181 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3182 Changed |= SplitCriticalEdge(TI: Edge.first, SuccNum: Edge.second,
3183 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3184 nullptr;
3185 } while (!ToSplit.empty());
3186 if (Changed) {
3187 if (MD)
3188 MD->invalidateCachedPredecessors();
3189 InvalidBlockRPONumbers = true;
3190 }
3191 return Changed;
3192}
3193
3194/// Executes one iteration of GVN.
3195bool GVNPass::iterateOnFunction(Function &F) {
3196 cleanupGlobalSets();
3197
3198 // Top-down walk of the dominator tree.
3199 bool Changed = false;
3200 // Needed for value numbering with phi construction to work.
3201 // RPOT walks the graph in its constructor and will not be invalidated during
3202 // processBlock.
3203 ReversePostOrderTraversal<Function *> RPOT(&F);
3204
3205 for (BasicBlock *BB : RPOT)
3206 Changed |= processBlock(BB);
3207
3208 return Changed;
3209}
3210
3211void GVNPass::cleanupGlobalSets() {
3212 VN.clear();
3213 LeaderTable.clear();
3214 BlockRPONumber.clear();
3215 ICF->clear();
3216 InvalidBlockRPONumbers = true;
3217}
3218
3219void GVNPass::removeInstruction(Instruction *I) {
3220 VN.erase(V: I);
3221 if (MD) MD->removeInstruction(InstToRemove: I);
3222 if (MSSAU)
3223 MSSAU->removeMemoryAccess(I);
3224#ifndef NDEBUG
3225 verifyRemoved(I);
3226#endif
3227 ICF->removeInstruction(Inst: I);
3228 I->eraseFromParent();
3229}
3230
3231/// Verify that the specified instruction does not occur in our
3232/// internal data structures.
3233void GVNPass::verifyRemoved(const Instruction *Inst) const {
3234 VN.verifyRemoved(V: Inst);
3235 LeaderTable.verifyRemoved(V: Inst);
3236}
3237
3238/// BB is declared dead, which implied other blocks become dead as well. This
3239/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3240/// live successors, update their phi nodes by replacing the operands
3241/// corresponding to dead blocks with UndefVal.
3242void GVNPass::addDeadBlock(BasicBlock *BB) {
3243 SmallVector<BasicBlock *, 4> NewDead;
3244 SmallSetVector<BasicBlock *, 4> DF;
3245
3246 NewDead.push_back(Elt: BB);
3247 while (!NewDead.empty()) {
3248 BasicBlock *D = NewDead.pop_back_val();
3249 if (DeadBlocks.count(key: D))
3250 continue;
3251
3252 // All blocks dominated by D are dead.
3253 SmallVector<BasicBlock *, 8> Dom;
3254 DT->getDescendants(R: D, Result&: Dom);
3255 DeadBlocks.insert_range(R&: Dom);
3256
3257 // Figure out the dominance-frontier(D).
3258 for (BasicBlock *B : Dom) {
3259 for (BasicBlock *S : successors(BB: B)) {
3260 if (DeadBlocks.count(key: S))
3261 continue;
3262
3263 bool AllPredDead = true;
3264 for (BasicBlock *P : predecessors(BB: S))
3265 if (!DeadBlocks.count(key: P)) {
3266 AllPredDead = false;
3267 break;
3268 }
3269
3270 if (!AllPredDead) {
3271 // S could be proved dead later on. That is why we don't update phi
3272 // operands at this moment.
3273 DF.insert(X: S);
3274 } else {
3275 // While S is not dominated by D, it is dead by now. This could take
3276 // place if S already have a dead predecessor before D is declared
3277 // dead.
3278 NewDead.push_back(Elt: S);
3279 }
3280 }
3281 }
3282 }
3283
3284 // For the dead blocks' live successors, update their phi nodes by replacing
3285 // the operands corresponding to dead blocks with UndefVal.
3286 for (BasicBlock *B : DF) {
3287 if (DeadBlocks.count(key: B))
3288 continue;
3289
3290 // First, split the critical edges. This might also create additional blocks
3291 // to preserve LoopSimplify form and adjust edges accordingly.
3292 SmallVector<BasicBlock *, 4> Preds(predecessors(BB: B));
3293 for (BasicBlock *P : Preds) {
3294 if (!DeadBlocks.count(key: P))
3295 continue;
3296
3297 if (is_contained(Range: successors(BB: P), Element: B) &&
3298 isCriticalEdge(TI: P->getTerminator(), Succ: B)) {
3299 if (BasicBlock *S = splitCriticalEdges(Pred: P, Succ: B))
3300 DeadBlocks.insert(X: P = S);
3301 }
3302 }
3303
3304 // Now poison the incoming values from the dead predecessors.
3305 for (BasicBlock *P : predecessors(BB: B)) {
3306 if (!DeadBlocks.count(key: P))
3307 continue;
3308 for (PHINode &Phi : B->phis()) {
3309 Phi.setIncomingValueForBlock(BB: P, V: PoisonValue::get(T: Phi.getType()));
3310 if (MD)
3311 MD->invalidateCachedPointerInfo(Ptr: &Phi);
3312 }
3313 }
3314 }
3315}
3316
3317// If the given branch is recognized as a foldable branch (i.e. conditional
3318// branch with constant condition), it will perform following analyses and
3319// transformation.
3320// 1) If the dead out-coming edge is a critical-edge, split it. Let
3321// R be the target of the dead out-coming edge.
3322// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3323// edge. The result of this step will be {X| X is dominated by R}
3324// 2) Identify those blocks which haves at least one dead predecessor. The
3325// result of this step will be dominance-frontier(R).
3326// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3327// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3328//
3329// Return true iff *NEW* dead code are found.
3330bool GVNPass::processFoldableCondBr(BranchInst *BI) {
3331 if (!BI || BI->isUnconditional())
3332 return false;
3333
3334 // If a branch has two identical successors, we cannot declare either dead.
3335 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
3336 return false;
3337
3338 ConstantInt *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition());
3339 if (!Cond)
3340 return false;
3341
3342 BasicBlock *DeadRoot =
3343 Cond->getZExtValue() ? BI->getSuccessor(i: 1) : BI->getSuccessor(i: 0);
3344 if (DeadBlocks.count(key: DeadRoot))
3345 return false;
3346
3347 if (!DeadRoot->getSinglePredecessor())
3348 DeadRoot = splitCriticalEdges(Pred: BI->getParent(), Succ: DeadRoot);
3349
3350 addDeadBlock(BB: DeadRoot);
3351 return true;
3352}
3353
3354// performPRE() will trigger assert if it comes across an instruction without
3355// associated val-num. As it normally has far more live instructions than dead
3356// instructions, it makes more sense just to "fabricate" a val-number for the
3357// dead code than checking if instruction involved is dead or not.
3358void GVNPass::assignValNumForDeadCode() {
3359 for (BasicBlock *BB : DeadBlocks) {
3360 for (Instruction &Inst : *BB) {
3361 unsigned ValNum = VN.lookupOrAdd(V: &Inst);
3362 LeaderTable.insert(N: ValNum, V: &Inst, BB);
3363 }
3364 }
3365}
3366
3367class llvm::gvn::GVNLegacyPass : public FunctionPass {
3368public:
3369 static char ID; // Pass identification, replacement for typeid.
3370
3371 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3372 bool MemSSAAnalysis = GVNEnableMemorySSA)
3373 : FunctionPass(ID), Impl(GVNOptions()
3374 .setMemDep(MemDepAnalysis)
3375 .setMemorySSA(MemSSAAnalysis)) {
3376 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3377 }
3378
3379 bool runOnFunction(Function &F) override {
3380 if (skipFunction(F))
3381 return false;
3382
3383 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3384 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3385 MSSAWP = &getAnalysis<MemorySSAWrapperPass>();
3386
3387 return Impl.runImpl(
3388 F, RunAC&: getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3389 RunDT&: getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3390 RunTLI: getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
3391 RunAA&: getAnalysis<AAResultsWrapperPass>().getAAResults(),
3392 RunMD: Impl.isMemDepEnabled()
3393 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
3394 : nullptr,
3395 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
3396 RunORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
3397 MSSA: MSSAWP ? &MSSAWP->getMSSA() : nullptr);
3398 }
3399
3400 void getAnalysisUsage(AnalysisUsage &AU) const override {
3401 AU.addRequired<AssumptionCacheTracker>();
3402 AU.addRequired<DominatorTreeWrapperPass>();
3403 AU.addRequired<TargetLibraryInfoWrapperPass>();
3404 AU.addRequired<LoopInfoWrapperPass>();
3405 if (Impl.isMemDepEnabled())
3406 AU.addRequired<MemoryDependenceWrapperPass>();
3407 AU.addRequired<AAResultsWrapperPass>();
3408 AU.addPreserved<DominatorTreeWrapperPass>();
3409 AU.addPreserved<GlobalsAAWrapperPass>();
3410 AU.addPreserved<TargetLibraryInfoWrapperPass>();
3411 AU.addPreserved<LoopInfoWrapperPass>();
3412 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
3413 AU.addPreserved<MemorySSAWrapperPass>();
3414 if (Impl.isMemorySSAEnabled())
3415 AU.addRequired<MemorySSAWrapperPass>();
3416 }
3417
3418private:
3419 GVNPass Impl;
3420};
3421
3422char GVNLegacyPass::ID = 0;
3423
3424INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3425INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
3426INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
3427INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
3428INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
3429INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3430INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
3431INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
3432INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
3433INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
3434
3435// The public interface to this file...
3436FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); }
3437