1//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global value numbering to eliminate fully redundant
10// instructions. It also performs simple dead load elimination.
11//
12// Note that this pass does the value numbering itself; it does not use the
13// ValueNumbering analysis passes.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/Scalar/GVN.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/DepthFirstIterator.h"
20#include "llvm/ADT/Hashing.h"
21#include "llvm/ADT/MapVector.h"
22#include "llvm/ADT/PostOrderIterator.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/AliasAnalysis.h"
29#include "llvm/Analysis/AssumeBundleQueries.h"
30#include "llvm/Analysis/AssumptionCache.h"
31#include "llvm/Analysis/CFG.h"
32#include "llvm/Analysis/DomTreeUpdater.h"
33#include "llvm/Analysis/GlobalsModRef.h"
34#include "llvm/Analysis/InstructionPrecedenceTracking.h"
35#include "llvm/Analysis/InstructionSimplify.h"
36#include "llvm/Analysis/Loads.h"
37#include "llvm/Analysis/LoopInfo.h"
38#include "llvm/Analysis/MemoryBuiltins.h"
39#include "llvm/Analysis/MemoryDependenceAnalysis.h"
40#include "llvm/Analysis/MemorySSA.h"
41#include "llvm/Analysis/MemorySSAUpdater.h"
42#include "llvm/Analysis/OptimizationRemarkEmitter.h"
43#include "llvm/Analysis/PHITransAddr.h"
44#include "llvm/Analysis/TargetLibraryInfo.h"
45#include "llvm/Analysis/ValueTracking.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/Dominators.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/LLVMContext.h"
58#include "llvm/IR/Metadata.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/IR/PatternMatch.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/InitializePasses.h"
66#include "llvm/Pass.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/raw_ostream.h"
72#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include "llvm/Transforms/Utils/SSAUpdater.h"
76#include "llvm/Transforms/Utils/VNCoercion.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::gvn;
85using namespace llvm::VNCoercion;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "gvn"
89
90STATISTIC(NumGVNInstr, "Number of instructions deleted");
91STATISTIC(NumGVNLoad, "Number of loads deleted");
92STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
93STATISTIC(NumGVNBlocks, "Number of blocks merged");
94STATISTIC(NumGVNSimpl, "Number of instructions simplified");
95STATISTIC(NumGVNEqProp, "Number of equalities propagated");
96STATISTIC(NumPRELoad, "Number of loads PRE'd");
97STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
98STATISTIC(NumPRELoadMoved2CEPred,
99 "Number of loads moved to predecessor of a critical edge in PRE");
100
101STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
102 "Number of blocks speculated as available in "
103 "IsValueFullyAvailableInBlock(), max");
104STATISTIC(MaxBBSpeculationCutoffReachedTimes,
105 "Number of times we we reached gvn-max-block-speculations cut-off "
106 "preventing further exploration");
107
108static cl::opt<bool> GVNEnableScalarPRE("enable-scalar-pre", cl::init(Val: true),
109 cl::Hidden);
110static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(Val: true));
111static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre",
112 cl::init(Val: true));
113static cl::opt<bool>
114GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre",
115 cl::init(Val: false));
116static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(Val: true));
117static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa",
118 cl::init(Val: false));
119
120static cl::opt<unsigned> ScanUsersLimit(
121 "gvn-scan-users-limit", cl::Hidden, cl::init(Val: 100),
122 cl::desc("The number of memory accesses to scan in a block in reaching "
123 "memory values analysis (default = 100)"));
124
125static cl::opt<uint32_t> MaxNumDeps(
126 "gvn-max-num-deps", cl::Hidden, cl::init(Val: 100),
127 cl::desc("Max number of dependences to attempt Load PRE (default = 100)"));
128
129// This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat.
130static cl::opt<uint32_t> MaxBBSpeculations(
131 "gvn-max-block-speculations", cl::Hidden, cl::init(Val: 600),
132 cl::desc("Max number of blocks we're willing to speculate on (and recurse "
133 "into) when deducing if a value is fully available or not in GVN "
134 "(default = 600)"));
135
136static cl::opt<uint32_t> MaxNumVisitedInsts(
137 "gvn-max-num-visited-insts", cl::Hidden, cl::init(Val: 100),
138 cl::desc("Max number of visited instructions when trying to find "
139 "dominating value of select dependency (default = 100)"));
140
141static cl::opt<uint32_t> MaxNumInsnsPerBlock(
142 "gvn-max-num-insns", cl::Hidden, cl::init(Val: 100),
143 cl::desc("Max number of instructions to scan in each basic block in GVN "
144 "(default = 100)"));
145
146struct llvm::GVNPass::Expression {
147 uint32_t Opcode;
148 bool Commutative = false;
149 // The type is not necessarily the result type of the expression, it may be
150 // any additional type needed to disambiguate the expression.
151 Type *Ty = nullptr;
152 SmallVector<uint32_t, 4> VarArgs;
153
154 AttributeList Attrs;
155
156 Expression(uint32_t Op = ~2U) : Opcode(Op) {}
157
158 bool operator==(const Expression &Other) const {
159 if (Opcode != Other.Opcode)
160 return false;
161 if (Opcode == ~0U || Opcode == ~1U)
162 return true;
163 if (Ty != Other.Ty)
164 return false;
165 if (VarArgs != Other.VarArgs)
166 return false;
167 if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) &&
168 !Attrs.intersectWith(C&: Ty->getContext(), Other: Other.Attrs).has_value())
169 return false;
170 return true;
171 }
172
173 friend hash_code hash_value(const Expression &Value) {
174 return hash_combine(args: Value.Opcode, args: Value.Ty,
175 args: hash_combine_range(R: Value.VarArgs));
176 }
177};
178
179template <> struct llvm::DenseMapInfo<GVNPass::Expression> {
180 static unsigned getHashValue(const GVNPass::Expression &E) {
181 using llvm::hash_value;
182
183 return static_cast<unsigned>(hash_value(Value: E));
184 }
185
186 static bool isEqual(const GVNPass::Expression &LHS,
187 const GVNPass::Expression &RHS) {
188 return LHS == RHS;
189 }
190};
191
192/// Represents a particular available value that we know how to materialize.
193/// Materialization of an AvailableValue never fails. An AvailableValue is
194/// implicitly associated with a rematerialization point which is the
195/// location of the instruction from which it was formed.
196struct llvm::gvn::AvailableValue {
197 enum class ValType {
198 SimpleVal, // A simple offsetted value that is accessed.
199 LoadVal, // A value produced by a load.
200 MemIntrin, // A memory intrinsic which is loaded from.
201 UndefVal, // A UndefValue representing a value from dead block (which
202 // is not yet physically removed from the CFG).
203 SelectVal, // A pointer select which is loaded from and for which the load
204 // can be replace by a value select.
205 };
206
207 /// Val - The value that is live out of the block.
208 Value *Val;
209 /// Kind of the live-out value.
210 ValType Kind;
211
212 /// Offset - The byte offset in Val that is interesting for the load query.
213 unsigned Offset = 0;
214 /// V1, V2 - The dominating non-clobbered values of SelectVal.
215 Value *V1 = nullptr, *V2 = nullptr;
216
217 static AvailableValue get(Value *V, unsigned Offset = 0) {
218 AvailableValue Res;
219 Res.Val = V;
220 Res.Kind = ValType::SimpleVal;
221 Res.Offset = Offset;
222 return Res;
223 }
224
225 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) {
226 AvailableValue Res;
227 Res.Val = MI;
228 Res.Kind = ValType::MemIntrin;
229 Res.Offset = Offset;
230 return Res;
231 }
232
233 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) {
234 AvailableValue Res;
235 Res.Val = Load;
236 Res.Kind = ValType::LoadVal;
237 Res.Offset = Offset;
238 return Res;
239 }
240
241 static AvailableValue getUndef() {
242 AvailableValue Res;
243 Res.Val = nullptr;
244 Res.Kind = ValType::UndefVal;
245 Res.Offset = 0;
246 return Res;
247 }
248
249 static AvailableValue getSelect(Value *Cond, Value *V1, Value *V2) {
250 AvailableValue Res;
251 Res.Val = Cond;
252 Res.Kind = ValType::SelectVal;
253 Res.Offset = 0;
254 Res.V1 = V1;
255 Res.V2 = V2;
256 return Res;
257 }
258
259 bool isSimpleValue() const { return Kind == ValType::SimpleVal; }
260 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; }
261 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; }
262 bool isUndefValue() const { return Kind == ValType::UndefVal; }
263 bool isSelectValue() const { return Kind == ValType::SelectVal; }
264
265 Value *getSimpleValue() const {
266 assert(isSimpleValue() && "Wrong accessor");
267 return Val;
268 }
269
270 LoadInst *getCoercedLoadValue() const {
271 assert(isCoercedLoadValue() && "Wrong accessor");
272 return cast<LoadInst>(Val);
273 }
274
275 MemIntrinsic *getMemIntrinValue() const {
276 assert(isMemIntrinValue() && "Wrong accessor");
277 return cast<MemIntrinsic>(Val);
278 }
279
280 Value *getSelectCondition() const {
281 assert(isSelectValue() && "Wrong accessor");
282 return Val;
283 }
284
285 /// Emit code at the specified insertion point to adjust the value defined
286 /// here to the specified type. This handles various coercion cases.
287 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const;
288};
289
290/// Represents an AvailableValue which can be rematerialized at the end of
291/// the associated BasicBlock.
292struct llvm::gvn::AvailableValueInBlock {
293 /// BB - The basic block in question.
294 BasicBlock *BB = nullptr;
295
296 /// AV - The actual available value.
297 AvailableValue AV;
298
299 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) {
300 AvailableValueInBlock Res;
301 Res.BB = BB;
302 Res.AV = std::move(AV);
303 return Res;
304 }
305
306 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
307 unsigned Offset = 0) {
308 return get(BB, AV: AvailableValue::get(V, Offset));
309 }
310
311 static AvailableValueInBlock getUndef(BasicBlock *BB) {
312 return get(BB, AV: AvailableValue::getUndef());
313 }
314
315 /// Emit code at the end of this block to adjust the value defined here to
316 /// the specified type. This handles various coercion cases.
317 Value *MaterializeAdjustedValue(LoadInst *Load) const {
318 return AV.MaterializeAdjustedValue(Load, InsertPt: BB->getTerminator());
319 }
320};
321
322//===----------------------------------------------------------------------===//
323// ValueTable Internal Functions
324//===----------------------------------------------------------------------===//
325
326GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
327 Expression E;
328 E.Ty = I->getType();
329 E.Opcode = I->getOpcode();
330 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Val: I)) {
331 // gc.relocate is 'special' call: its second and third operands are
332 // not real values, but indices into statepoint's argument list.
333 // Use the refered to values for purposes of identity.
334 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getOperand(i_nocapture: 0)));
335 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getBasePtr()));
336 E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getDerivedPtr()));
337 } else {
338 for (Use &Op : I->operands())
339 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
340 }
341 if (I->isCommutative()) {
342 // Ensure that commutative instructions that only differ by a permutation
343 // of their operands get the same value number by sorting the operand value
344 // numbers. Since commutative operands are the 1st two operands it is more
345 // efficient to sort by hand rather than using, say, std::sort.
346 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!");
347 if (E.VarArgs[0] > E.VarArgs[1])
348 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
349 E.Commutative = true;
350 }
351
352 if (auto *C = dyn_cast<CmpInst>(Val: I)) {
353 // Sort the operand value numbers so x<y and y>x get the same value number.
354 CmpInst::Predicate Predicate = C->getPredicate();
355 if (E.VarArgs[0] > E.VarArgs[1]) {
356 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
357 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
358 }
359 E.Opcode = (C->getOpcode() << 8) | Predicate;
360 E.Commutative = true;
361 } else if (auto *IVI = dyn_cast<InsertValueInst>(Val: I)) {
362 E.VarArgs.append(in_start: IVI->idx_begin(), in_end: IVI->idx_end());
363 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) {
364 ArrayRef<int> ShuffleMask = SVI->getShuffleMask();
365 E.VarArgs.append(in_start: ShuffleMask.begin(), in_end: ShuffleMask.end());
366 } else if (auto *CB = dyn_cast<CallBase>(Val: I)) {
367 E.Attrs = CB->getAttributes();
368 }
369
370 return E;
371}
372
373GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
374 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
375 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
376 "Not a comparison!");
377 Expression E;
378 E.Ty = CmpInst::makeCmpResultType(opnd_type: LHS->getType());
379 E.VarArgs.push_back(Elt: lookupOrAdd(V: LHS));
380 E.VarArgs.push_back(Elt: lookupOrAdd(V: RHS));
381
382 // Sort the operand value numbers so x<y and y>x get the same value number.
383 if (E.VarArgs[0] > E.VarArgs[1]) {
384 std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]);
385 Predicate = CmpInst::getSwappedPredicate(pred: Predicate);
386 }
387 E.Opcode = (Opcode << 8) | Predicate;
388 E.Commutative = true;
389 return E;
390}
391
392GVNPass::Expression
393GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
394 assert(EI && "Not an ExtractValueInst?");
395 Expression E;
396 E.Ty = EI->getType();
397 E.Opcode = 0;
398
399 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand());
400 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
401 // EI is an extract from one of our with.overflow intrinsics. Synthesize
402 // a semantically equivalent expression instead of an extract value
403 // expression.
404 E.Opcode = WO->getBinaryOp();
405 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getLHS()));
406 E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getRHS()));
407 return E;
408 }
409
410 // Not a recognised intrinsic. Fall back to producing an extract value
411 // expression.
412 E.Opcode = EI->getOpcode();
413 for (Use &Op : EI->operands())
414 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
415
416 append_range(C&: E.VarArgs, R: EI->indices());
417
418 return E;
419}
420
421GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
422 Expression E;
423 Type *PtrTy = GEP->getType()->getScalarType();
424 const DataLayout &DL = GEP->getDataLayout();
425 unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: PtrTy);
426 SmallMapVector<Value *, APInt, 4> VariableOffsets;
427 APInt ConstantOffset(BitWidth, 0);
428 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
429 // Convert into offset representation, to recognize equivalent address
430 // calculations that use different type encoding.
431 LLVMContext &Context = GEP->getContext();
432 E.Opcode = GEP->getOpcode();
433 E.Ty = nullptr;
434 E.VarArgs.push_back(Elt: lookupOrAdd(V: GEP->getPointerOperand()));
435 for (const auto &[V, Scale] : VariableOffsets) {
436 E.VarArgs.push_back(Elt: lookupOrAdd(V));
437 E.VarArgs.push_back(Elt: lookupOrAdd(V: ConstantInt::get(Context, V: Scale)));
438 }
439 if (!ConstantOffset.isZero())
440 E.VarArgs.push_back(
441 Elt: lookupOrAdd(V: ConstantInt::get(Context, V: ConstantOffset)));
442 } else {
443 // If converting to offset representation fails (for scalable vectors),
444 // fall back to type-based implementation.
445 E.Opcode = GEP->getOpcode();
446 E.Ty = GEP->getSourceElementType();
447 for (Use &Op : GEP->operands())
448 E.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
449 }
450 return E;
451}
452
453//===----------------------------------------------------------------------===//
454// ValueTable External Functions
455//===----------------------------------------------------------------------===//
456
457GVNPass::ValueTable::ValueTable() = default;
458GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
459GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
460GVNPass::ValueTable::~ValueTable() = default;
461GVNPass::ValueTable &
462GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
463
464/// add - Insert a value into the table with a specified value number.
465void GVNPass::ValueTable::add(Value *V, uint32_t Num) {
466 ValueNumbering.insert(KV: std::make_pair(x&: V, y&: Num));
467 if (PHINode *PN = dyn_cast<PHINode>(Val: V))
468 NumberingPhi[Num] = PN;
469}
470
471/// Include the incoming memory state into the hash of the expression for the
472/// given instruction. If the incoming memory state is:
473/// * LiveOnEntry, add the value number of the entry block,
474/// * a MemoryPhi, add the value number of the basic block corresponding to that
475/// MemoryPhi,
476/// * a MemoryDef, add the value number of the memory setting instruction.
477void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) {
478 assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA");
479 assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory");
480 MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I);
481 Exp.VarArgs.push_back(Elt: lookupOrAdd(MA));
482}
483
484uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
485 // FIXME: Currently the calls which may access the thread id may
486 // be considered as not accessing the memory. But this is
487 // problematic for coroutines, since coroutines may resume in a
488 // different thread. So we disable the optimization here for the
489 // correctness. However, it may block many other correct
490 // optimizations. Revert this one when we detect the memory
491 // accessing kind more precisely.
492 if (C->getFunction()->isPresplitCoroutine()) {
493 ValueNumbering[C] = NextValueNumber;
494 return NextValueNumber++;
495 }
496
497 // Do not combine convergent calls since they implicitly depend on the set of
498 // threads that is currently executing, and they might be in different basic
499 // blocks.
500 if (C->isConvergent()) {
501 ValueNumbering[C] = NextValueNumber;
502 return NextValueNumber++;
503 }
504
505 if (AA->doesNotAccessMemory(Call: C)) {
506 Expression Exp = createExpr(I: C);
507 uint32_t E = assignExpNewValueNum(Exp).first;
508 ValueNumbering[C] = E;
509 return E;
510 }
511
512 if (MD && AA->onlyReadsMemory(Call: C)) {
513 Expression Exp = createExpr(I: C);
514 auto [E, IsValNumNew] = assignExpNewValueNum(Exp);
515 if (IsValNumNew) {
516 ValueNumbering[C] = E;
517 return E;
518 }
519
520 MemDepResult LocalDep = MD->getDependency(QueryInst: C);
521
522 if (!LocalDep.isDef() && !LocalDep.isNonLocal()) {
523 ValueNumbering[C] = NextValueNumber;
524 return NextValueNumber++;
525 }
526
527 if (LocalDep.isDef()) {
528 // For masked load/store intrinsics, the local_dep may actually be
529 // a normal load or store instruction.
530 CallInst *LocalDepCall = dyn_cast<CallInst>(Val: LocalDep.getInst());
531
532 if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) {
533 ValueNumbering[C] = NextValueNumber;
534 return NextValueNumber++;
535 }
536
537 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
538 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
539 uint32_t LocalDepCallVN = lookupOrAdd(V: LocalDepCall->getArgOperand(i: I));
540 if (CVN != LocalDepCallVN) {
541 ValueNumbering[C] = NextValueNumber;
542 return NextValueNumber++;
543 }
544 }
545
546 uint32_t V = lookupOrAdd(V: LocalDepCall);
547 ValueNumbering[C] = V;
548 return V;
549 }
550
551 // Non-local case.
552 const MemoryDependenceResults::NonLocalDepInfo &Deps =
553 MD->getNonLocalCallDependency(QueryCall: C);
554 // FIXME: Move the checking logic to MemDep!
555 CallInst *CDep = nullptr;
556
557 // Check to see if we have a single dominating call instruction that is
558 // identical to C.
559 for (const NonLocalDepEntry &I : Deps) {
560 if (I.getResult().isNonLocal())
561 continue;
562
563 // We don't handle non-definitions. If we already have a call, reject
564 // instruction dependencies.
565 if (!I.getResult().isDef() || CDep != nullptr) {
566 CDep = nullptr;
567 break;
568 }
569
570 CallInst *NonLocalDepCall = dyn_cast<CallInst>(Val: I.getResult().getInst());
571 // FIXME: All duplicated with non-local case.
572 if (NonLocalDepCall && DT->properlyDominates(A: I.getBB(), B: C->getParent())) {
573 CDep = NonLocalDepCall;
574 continue;
575 }
576
577 CDep = nullptr;
578 break;
579 }
580
581 if (!CDep) {
582 ValueNumbering[C] = NextValueNumber;
583 return NextValueNumber++;
584 }
585
586 if (CDep->arg_size() != C->arg_size()) {
587 ValueNumbering[C] = NextValueNumber;
588 return NextValueNumber++;
589 }
590 for (unsigned I = 0, E = C->arg_size(); I < E; ++I) {
591 uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I));
592 uint32_t CDepVN = lookupOrAdd(V: CDep->getArgOperand(i: I));
593 if (CVN != CDepVN) {
594 ValueNumbering[C] = NextValueNumber;
595 return NextValueNumber++;
596 }
597 }
598
599 uint32_t V = lookupOrAdd(V: CDep);
600 ValueNumbering[C] = V;
601 return V;
602 }
603
604 if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(Call: C)) {
605 Expression Exp = createExpr(I: C);
606 addMemoryStateToExp(I: C, Exp);
607 auto [V, _] = assignExpNewValueNum(Exp);
608 ValueNumbering[C] = V;
609 return V;
610 }
611
612 ValueNumbering[C] = NextValueNumber;
613 return NextValueNumber++;
614}
615
616/// Returns the value number for the specified load or store instruction.
617uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) {
618 if (!MSSA || !IsMSSAEnabled) {
619 ValueNumbering[I] = NextValueNumber;
620 return NextValueNumber++;
621 }
622
623 Expression Exp;
624 Exp.Ty = I->getType();
625 Exp.Opcode = I->getOpcode();
626 for (Use &Op : I->operands())
627 Exp.VarArgs.push_back(Elt: lookupOrAdd(V: Op));
628 addMemoryStateToExp(I, Exp);
629
630 auto [V, _] = assignExpNewValueNum(Exp);
631 ValueNumbering[I] = V;
632 return V;
633}
634
635/// Returns true if a value number exists for the specified value.
636bool GVNPass::ValueTable::exists(Value *V) const {
637 return ValueNumbering.contains(Val: V);
638}
639
640uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) {
641 return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(Val: MA)
642 ? lookupOrAdd(V: MA->getBlock())
643 : lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
644}
645
646/// lookupOrAdd - Returns the value number for the specified value, assigning
647/// it a new number if it did not have one before.
648uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
649 auto VI = ValueNumbering.find(Val: V);
650 if (VI != ValueNumbering.end())
651 return VI->second;
652
653 auto *I = dyn_cast<Instruction>(Val: V);
654 if (!I) {
655 ValueNumbering[V] = NextValueNumber;
656 if (isa<BasicBlock>(Val: V))
657 NumberingBB[NextValueNumber] = cast<BasicBlock>(Val: V);
658 return NextValueNumber++;
659 }
660
661 Expression Exp;
662 switch (I->getOpcode()) {
663 case Instruction::Call:
664 return lookupOrAddCall(C: cast<CallInst>(Val: I));
665 case Instruction::FNeg:
666 case Instruction::Add:
667 case Instruction::FAdd:
668 case Instruction::Sub:
669 case Instruction::FSub:
670 case Instruction::Mul:
671 case Instruction::FMul:
672 case Instruction::UDiv:
673 case Instruction::SDiv:
674 case Instruction::FDiv:
675 case Instruction::URem:
676 case Instruction::SRem:
677 case Instruction::FRem:
678 case Instruction::Shl:
679 case Instruction::LShr:
680 case Instruction::AShr:
681 case Instruction::And:
682 case Instruction::Or:
683 case Instruction::Xor:
684 case Instruction::ICmp:
685 case Instruction::FCmp:
686 case Instruction::Trunc:
687 case Instruction::ZExt:
688 case Instruction::SExt:
689 case Instruction::FPToUI:
690 case Instruction::FPToSI:
691 case Instruction::UIToFP:
692 case Instruction::SIToFP:
693 case Instruction::FPTrunc:
694 case Instruction::FPExt:
695 case Instruction::PtrToInt:
696 case Instruction::PtrToAddr:
697 case Instruction::IntToPtr:
698 case Instruction::AddrSpaceCast:
699 case Instruction::BitCast:
700 case Instruction::Select:
701 case Instruction::Freeze:
702 case Instruction::ExtractElement:
703 case Instruction::InsertElement:
704 case Instruction::ShuffleVector:
705 case Instruction::InsertValue:
706 Exp = createExpr(I);
707 break;
708 case Instruction::GetElementPtr:
709 Exp = createGEPExpr(GEP: cast<GetElementPtrInst>(Val: I));
710 break;
711 case Instruction::ExtractValue:
712 Exp = createExtractvalueExpr(EI: cast<ExtractValueInst>(Val: I));
713 break;
714 case Instruction::PHI:
715 ValueNumbering[V] = NextValueNumber;
716 NumberingPhi[NextValueNumber] = cast<PHINode>(Val: V);
717 return NextValueNumber++;
718 case Instruction::Load:
719 case Instruction::Store:
720 return computeLoadStoreVN(I);
721 default:
722 ValueNumbering[V] = NextValueNumber;
723 return NextValueNumber++;
724 }
725
726 uint32_t E = assignExpNewValueNum(Exp).first;
727 ValueNumbering[V] = E;
728 return E;
729}
730
731/// Returns the value number of the specified value. Fails if
732/// the value has not yet been numbered.
733uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
734 auto VI = ValueNumbering.find(Val: V);
735 if (Verify) {
736 assert(VI != ValueNumbering.end() && "Value not numbered?");
737 return VI->second;
738 }
739 return (VI != ValueNumbering.end()) ? VI->second : 0;
740}
741
742/// Returns the value number of the given comparison,
743/// assigning it a new number if it did not have one before. Useful when
744/// we deduced the result of a comparison, but don't immediately have an
745/// instruction realizing that comparison to hand.
746uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
747 CmpInst::Predicate Predicate,
748 Value *LHS, Value *RHS) {
749 Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
750 return assignExpNewValueNum(Exp).first;
751}
752
753/// Remove all entries from the ValueTable.
754void GVNPass::ValueTable::clear() {
755 ValueNumbering.clear();
756 ExpressionNumbering.clear();
757 NumberingPhi.clear();
758 NumberingBB.clear();
759 PhiTranslateTable.clear();
760 NextValueNumber = 1;
761 Expressions.clear();
762 ExprIdx.clear();
763 NextExprNumber = 0;
764}
765
766/// Remove a value from the value numbering.
767void GVNPass::ValueTable::erase(Value *V) {
768 uint32_t Num = ValueNumbering.lookup(Val: V);
769 ValueNumbering.erase(Val: V);
770 // If V is PHINode, V <--> value number is an one-to-one mapping.
771 if (isa<PHINode>(Val: V))
772 NumberingPhi.erase(Val: Num);
773 else if (isa<BasicBlock>(Val: V))
774 NumberingBB.erase(Val: Num);
775}
776
777/// verifyRemoved - Verify that the value is removed from all internal data
778/// structures.
779void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
780 assert(!ValueNumbering.contains(V) &&
781 "Inst still occurs in value numbering map!");
782}
783
784//===----------------------------------------------------------------------===//
785// LeaderMap External Functions
786//===----------------------------------------------------------------------===//
787
788/// Push a new Value to the LeaderTable onto the list for its value number.
789void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) {
790 const auto &[It, Inserted] = NumToLeaders.try_emplace(Key: N, Args&: V, Args&: BB, Args: nullptr);
791 if (!Inserted) {
792 // Key already exists: insert new node after the head.
793 auto *NewSlot = TableAllocator.Allocate<LeaderListNode>();
794 new (NewSlot) LeaderListNode(V, BB, It->second.Next);
795 It->second.Next = NewSlot;
796 }
797}
798
799/// Scan the list of values corresponding to a given
800/// value number, and remove the given instruction if encountered.
801void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I,
802 const BasicBlock *BB) {
803 auto It = NumToLeaders.find(Val: N);
804 if (It == NumToLeaders.end())
805 return;
806
807 LeaderListNode *Prev = nullptr;
808 LeaderListNode *Curr = &It->second;
809
810 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) {
811 Prev = Curr;
812 Curr = Curr->Next;
813 }
814
815 if (!Curr)
816 return;
817
818 if (Prev) {
819 // Non-head node: unlink and destroy.
820 Prev->Next = Curr->Next;
821 Curr->~LeaderListNode();
822 TableAllocator.Deallocate<LeaderListNode>(Ptr: Curr);
823 } else {
824 // Head node (stored by value in DenseMap).
825 if (!Curr->Next) {
826 // Only node; erase from map (DenseMap calls the destructor).
827 NumToLeaders.erase(I: It);
828 } else {
829 // Move second node's data into head, then destroy second node.
830 LeaderListNode *Next = Curr->Next;
831 Curr->Entry.Val = std::move(Next->Entry.Val);
832 Curr->Entry.BB = Next->Entry.BB;
833 Curr->Next = Next->Next;
834 Next->~LeaderListNode();
835 TableAllocator.Deallocate<LeaderListNode>(Ptr: Next);
836 }
837 }
838}
839
840//===----------------------------------------------------------------------===//
841// GVN Pass
842//===----------------------------------------------------------------------===//
843
844bool GVNPass::isScalarPREEnabled() const {
845 return Options.AllowScalarPRE.value_or(u&: GVNEnableScalarPRE);
846}
847
848bool GVNPass::isLoadPREEnabled() const {
849 return Options.AllowLoadPRE.value_or(u&: GVNEnableLoadPRE);
850}
851
852bool GVNPass::isLoadInLoopPREEnabled() const {
853 return Options.AllowLoadInLoopPRE.value_or(u&: GVNEnableLoadInLoopPRE);
854}
855
856bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
857 return Options.AllowLoadPRESplitBackedge.value_or(
858 u&: GVNEnableSplitBackedgeInLoadPRE);
859}
860
861bool GVNPass::isMemDepEnabled() const {
862 return Options.AllowMemDep.value_or(u&: GVNEnableMemDep);
863}
864
865bool GVNPass::isMemorySSAEnabled() const {
866 return Options.AllowMemorySSA.value_or(u&: GVNEnableMemorySSA);
867}
868
869PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
870 // FIXME: The order of evaluation of these 'getResult' calls is very
871 // significant! Re-ordering these variables will cause GVN when run alone to
872 // be less effective! We should fix memdep and basic-aa to not exhibit this
873 // behavior, but until then don't change the order here.
874 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
875 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
876 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
877 auto &AA = AM.getResult<AAManager>(IR&: F);
878 auto *MemDep =
879 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(IR&: F) : nullptr;
880 auto &LI = AM.getResult<LoopAnalysis>(IR&: F);
881 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F);
882 if (isMemorySSAEnabled() && !MSSA) {
883 assert(!MemDep &&
884 "On-demand computation of MemSSA implies that MemDep is disabled!");
885 MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F);
886 }
887 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
888 bool Changed = runImpl(F, RunAC&: AC, RunDT&: DT, RunTLI: TLI, RunAA&: AA, RunMD: MemDep, LI, ORE: &ORE,
889 MSSA: MSSA ? &MSSA->getMSSA() : nullptr);
890 if (!Changed)
891 return PreservedAnalyses::all();
892 PreservedAnalyses PA;
893 PA.preserve<DominatorTreeAnalysis>();
894 PA.preserve<TargetLibraryAnalysis>();
895 if (MSSA)
896 PA.preserve<MemorySSAAnalysis>();
897 PA.preserve<LoopAnalysis>();
898 return PA;
899}
900
901void GVNPass::printPipeline(
902 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
903 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
904 OS, MapClassName2PassName);
905
906 OS << '<';
907 if (Options.AllowScalarPRE != std::nullopt)
908 OS << (*Options.AllowScalarPRE ? "" : "no-") << "scalar-pre;";
909 if (Options.AllowLoadPRE != std::nullopt)
910 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;";
911 if (Options.AllowLoadPRESplitBackedge != std::nullopt)
912 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-")
913 << "split-backedge-load-pre;";
914 if (Options.AllowMemDep != std::nullopt)
915 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;";
916 if (Options.AllowMemorySSA != std::nullopt)
917 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa";
918 OS << '>';
919}
920
921void GVNPass::salvageAndRemoveInstruction(Instruction *I) {
922 salvageKnowledge(I, AC);
923 salvageDebugInfo(I&: *I);
924 removeInstruction(I);
925}
926
927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
928LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const {
929 errs() << "{\n";
930 for (const auto &[Num, Exp] : Map) {
931 errs() << Num << "\n";
932 Exp->dump();
933 }
934 errs() << "}\n";
935}
936#endif
937
938enum class AvailabilityState : char {
939 /// We know the block *is not* fully available. This is a fixpoint.
940 Unavailable = 0,
941 /// We know the block *is* fully available. This is a fixpoint.
942 Available = 1,
943 /// We do not know whether the block is fully available or not,
944 /// but we are currently speculating that it will be.
945 /// If it would have turned out that the block was, in fact, not fully
946 /// available, this would have been cleaned up into an Unavailable.
947 SpeculativelyAvailable = 2,
948};
949
950/// Return true if we can prove that the value
951/// we're analyzing is fully available in the specified block. As we go, keep
952/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
953/// map is actually a tri-state map with the following values:
954/// 0) we know the block *is not* fully available.
955/// 1) we know the block *is* fully available.
956/// 2) we do not know whether the block is fully available or not, but we are
957/// currently speculating that it will be.
958static bool IsValueFullyAvailableInBlock(
959 BasicBlock *BB,
960 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) {
961 SmallVector<BasicBlock *, 32> Worklist;
962 std::optional<BasicBlock *> UnavailableBB;
963
964 // The number of times we didn't find an entry for a block in a map and
965 // optimistically inserted an entry marking block as speculatively available.
966 unsigned NumNewNewSpeculativelyAvailableBBs = 0;
967
968#ifndef NDEBUG
969 SmallPtrSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs;
970 SmallVector<BasicBlock *, 32> AvailableBBs;
971#endif
972
973 Worklist.emplace_back(Args&: BB);
974 while (!Worklist.empty()) {
975 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first!
976 // Optimistically assume that the block is Speculatively Available and check
977 // to see if we already know about this block in one lookup.
978 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV =
979 FullyAvailableBlocks.try_emplace(
980 Key: CurrBB, Args: AvailabilityState::SpeculativelyAvailable);
981 AvailabilityState &State = IV.first->second;
982
983 // Did the entry already exist for this block?
984 if (!IV.second) {
985 if (State == AvailabilityState::Unavailable) {
986 UnavailableBB = CurrBB;
987 break; // Backpropagate unavailability info.
988 }
989
990#ifndef NDEBUG
991 AvailableBBs.emplace_back(CurrBB);
992#endif
993 continue; // Don't recurse further, but continue processing worklist.
994 }
995
996 // No entry found for block.
997 ++NumNewNewSpeculativelyAvailableBBs;
998 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations;
999
1000 // If we have exhausted our budget, mark this block as unavailable.
1001 // Also, if this block has no predecessors, the value isn't live-in here.
1002 if (OutOfBudget || pred_empty(BB: CurrBB)) {
1003 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget;
1004 State = AvailabilityState::Unavailable;
1005 UnavailableBB = CurrBB;
1006 break; // Backpropagate unavailability info.
1007 }
1008
1009 // Tentatively consider this block as speculatively available.
1010#ifndef NDEBUG
1011 NewSpeculativelyAvailableBBs.insert(CurrBB);
1012#endif
1013 // And further recurse into block's predecessors, in depth-first order!
1014 Worklist.append(in_start: pred_begin(BB: CurrBB), in_end: pred_end(BB: CurrBB));
1015 }
1016
1017#if LLVM_ENABLE_STATS
1018 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax(
1019 NumNewNewSpeculativelyAvailableBBs);
1020#endif
1021
1022 // If the block isn't marked as fixpoint yet
1023 // (the Unavailable and Available states are fixpoints).
1024 auto MarkAsFixpointAndEnqueueSuccessors =
1025 [&](BasicBlock *BB, AvailabilityState FixpointState) {
1026 auto It = FullyAvailableBlocks.find(Val: BB);
1027 if (It == FullyAvailableBlocks.end())
1028 return; // Never queried this block, leave as-is.
1029 switch (AvailabilityState &State = It->second) {
1030 case AvailabilityState::Unavailable:
1031 case AvailabilityState::Available:
1032 return; // Don't backpropagate further, continue processing worklist.
1033 case AvailabilityState::SpeculativelyAvailable: // Fix it!
1034 State = FixpointState;
1035#ifndef NDEBUG
1036 assert(NewSpeculativelyAvailableBBs.erase(BB) &&
1037 "Found a speculatively available successor leftover?");
1038#endif
1039 // Queue successors for further processing.
1040 Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB));
1041 return;
1042 }
1043 };
1044
1045 if (UnavailableBB) {
1046 // Okay, we have encountered an unavailable block.
1047 // Mark speculatively available blocks reachable from UnavailableBB as
1048 // unavailable as well. Paths are terminated when they reach blocks not in
1049 // FullyAvailableBlocks or they are not marked as speculatively available.
1050 Worklist.clear();
1051 Worklist.append(in_start: succ_begin(BB: *UnavailableBB), in_end: succ_end(BB: *UnavailableBB));
1052 while (!Worklist.empty())
1053 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1054 AvailabilityState::Unavailable);
1055 }
1056
1057#ifndef NDEBUG
1058 Worklist.clear();
1059 for (BasicBlock *AvailableBB : AvailableBBs)
1060 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB));
1061 while (!Worklist.empty())
1062 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(),
1063 AvailabilityState::Available);
1064
1065 assert(NewSpeculativelyAvailableBBs.empty() &&
1066 "Must have fixed all the new speculatively available blocks.");
1067#endif
1068
1069 return !UnavailableBB;
1070}
1071
1072/// If the specified OldValue exists in ValuesPerBlock, replace its value with
1073/// NewValue.
1074static void replaceValuesPerBlockEntry(
1075 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
1076 Value *NewValue) {
1077 for (AvailableValueInBlock &V : ValuesPerBlock) {
1078 if (V.AV.Val == OldValue)
1079 V.AV.Val = NewValue;
1080 if (V.AV.isSelectValue()) {
1081 if (V.AV.V1 == OldValue)
1082 V.AV.V1 = NewValue;
1083 if (V.AV.V2 == OldValue)
1084 V.AV.V2 = NewValue;
1085 }
1086 }
1087}
1088
1089/// Given a set of loads specified by ValuesPerBlock,
1090/// construct SSA form, allowing us to eliminate Load. This returns the value
1091/// that should be used at Load's definition site.
1092static Value *
1093ConstructSSAForLoadSet(LoadInst *Load,
1094 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1095 GVNPass &GVN) {
1096 // Check for the fully redundant, dominating load case. In this case, we can
1097 // just use the dominating value directly.
1098 if (ValuesPerBlock.size() == 1 &&
1099 GVN.getDominatorTree().properlyDominates(A: ValuesPerBlock[0].BB,
1100 B: Load->getParent())) {
1101 assert(!ValuesPerBlock[0].AV.isUndefValue() &&
1102 "Dead BB dominate this block");
1103 return ValuesPerBlock[0].MaterializeAdjustedValue(Load);
1104 }
1105
1106 // Otherwise, we have to construct SSA form.
1107 SmallVector<PHINode*, 8> NewPHIs;
1108 SSAUpdater SSAUpdate(&NewPHIs);
1109 SSAUpdate.Initialize(Ty: Load->getType(), Name: Load->getName());
1110
1111 for (const AvailableValueInBlock &AV : ValuesPerBlock) {
1112 BasicBlock *BB = AV.BB;
1113
1114 if (AV.AV.isUndefValue())
1115 continue;
1116
1117 if (SSAUpdate.HasValueForBlock(BB))
1118 continue;
1119
1120 // If the value is the load that we will be eliminating, and the block it's
1121 // available in is the block that the load is in, then don't add it as
1122 // SSAUpdater will resolve the value to the relevant phi which may let it
1123 // avoid phi construction entirely if there's actually only one value.
1124 if (BB == Load->getParent() &&
1125 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) ||
1126 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load)))
1127 continue;
1128
1129 SSAUpdate.AddAvailableValue(BB, V: AV.MaterializeAdjustedValue(Load));
1130 }
1131
1132 // Perform PHI construction.
1133 return SSAUpdate.GetValueInMiddleOfBlock(BB: Load->getParent());
1134}
1135
1136Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
1137 Instruction *InsertPt) const {
1138 Value *Res;
1139 Type *LoadTy = Load->getType();
1140 const DataLayout &DL = Load->getDataLayout();
1141 if (isSimpleValue()) {
1142 Res = getSimpleValue();
1143 if (Res->getType() != LoadTy) {
1144 Res = getValueForLoad(SrcVal: Res, Offset, LoadTy, InsertPt, F: Load->getFunction());
1145
1146 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
1147 << " " << *getSimpleValue() << '\n'
1148 << *Res << '\n'
1149 << "\n\n\n");
1150 }
1151 } else if (isCoercedLoadValue()) {
1152 LoadInst *CoercedLoad = getCoercedLoadValue();
1153 if (CoercedLoad->getType() == LoadTy && Offset == 0) {
1154 Res = CoercedLoad;
1155 combineMetadataForCSE(K: CoercedLoad, J: Load, DoesKMove: false);
1156 } else {
1157 Res = getValueForLoad(SrcVal: CoercedLoad, Offset, LoadTy, InsertPt,
1158 F: Load->getFunction());
1159 // We are adding a new user for this load, for which the original
1160 // metadata may not hold. Additionally, the new load may have a different
1161 // size and type, so their metadata cannot be combined in any
1162 // straightforward way.
1163 // Drop all metadata that is not known to cause immediate UB on violation,
1164 // unless the load has !noundef, in which case all metadata violations
1165 // will be promoted to UB.
1166 // !noalias and !alias.scope are kept: the load is not moved and still
1167 // accesses the same memory, and these are independent of the load type
1168 // and offset, so they remain valid for the coerced result.
1169 if (!CoercedLoad->hasMetadata(KindID: LLVMContext::MD_noundef))
1170 CoercedLoad->dropUnknownNonDebugMetadata(
1171 KnownIDs: {LLVMContext::MD_dereferenceable,
1172 LLVMContext::MD_dereferenceable_or_null,
1173 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group,
1174 LLVMContext::MD_alias_scope, LLVMContext::MD_noalias});
1175 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
1176 << " " << *getCoercedLoadValue() << '\n'
1177 << *Res << '\n'
1178 << "\n\n\n");
1179 }
1180 } else if (isMemIntrinValue()) {
1181 Res = getMemInstValueForLoad(SrcInst: getMemIntrinValue(), Offset, LoadTy,
1182 InsertPt, DL);
1183 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1184 << " " << *getMemIntrinValue() << '\n'
1185 << *Res << '\n'
1186 << "\n\n\n");
1187 } else if (isSelectValue()) {
1188 // Introduce a new value select for a load from an eligible pointer select.
1189 Value *Cond = getSelectCondition();
1190 assert(V1 && V2 && "both value operands of the select must be present");
1191 Res = SelectInst::Create(C: Cond, S1: V1, S2: V2, NameStr: "", InsertBefore: InsertPt->getIterator());
1192 // We use the DebugLoc from the original load here, as this instruction
1193 // materializes the value that would previously have been loaded.
1194 cast<SelectInst>(Val: Res)->setDebugLoc(Load->getDebugLoc());
1195 } else {
1196 llvm_unreachable("Should not materialize value from dead block");
1197 }
1198 assert(Res && "failed to materialize?");
1199 return Res;
1200}
1201
1202static bool isLifetimeStart(const Instruction *Inst) {
1203 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Val: Inst))
1204 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1205 return false;
1206}
1207
1208/// Assuming To can be reached from both From and Between, does Between lie on
1209/// every path from From to To?
1210static bool liesBetween(const Instruction *From, Instruction *Between,
1211 const Instruction *To, const DominatorTree *DT) {
1212 if (From->getParent() == Between->getParent())
1213 return DT->dominates(Def: From, User: Between);
1214 SmallPtrSet<BasicBlock *, 1> Exclusion;
1215 Exclusion.insert(Ptr: Between->getParent());
1216 return !isPotentiallyReachable(From, To, ExclusionSet: &Exclusion, DT);
1217}
1218
1219static const Instruction *findMayClobberedPtrAccess(LoadInst *Load,
1220 const DominatorTree *DT) {
1221 Value *PtrOp = Load->getPointerOperand();
1222 if (!PtrOp->hasUseList())
1223 return nullptr;
1224
1225 Instruction *OtherAccess = nullptr;
1226
1227 for (auto *U : PtrOp->users()) {
1228 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1229 auto *I = cast<Instruction>(Val: U);
1230 if (I->getFunction() == Load->getFunction() && DT->dominates(Def: I, User: Load)) {
1231 // Use the most immediately dominating value.
1232 if (OtherAccess) {
1233 if (DT->dominates(Def: OtherAccess, User: I))
1234 OtherAccess = I;
1235 else
1236 assert(U == OtherAccess || DT->dominates(I, OtherAccess));
1237 } else
1238 OtherAccess = I;
1239 }
1240 }
1241 }
1242
1243 if (OtherAccess)
1244 return OtherAccess;
1245
1246 // There is no dominating use, check if we can find a closest non-dominating
1247 // use that lies between any other potentially available use and Load.
1248 for (auto *U : PtrOp->users()) {
1249 if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) {
1250 auto *I = cast<Instruction>(Val: U);
1251 if (I->getFunction() == Load->getFunction() &&
1252 isPotentiallyReachable(From: I, To: Load, ExclusionSet: nullptr, DT)) {
1253 if (OtherAccess) {
1254 if (liesBetween(From: OtherAccess, Between: I, To: Load, DT)) {
1255 OtherAccess = I;
1256 } else if (!liesBetween(From: I, Between: OtherAccess, To: Load, DT)) {
1257 // These uses are both partially available at Load were it not for
1258 // the clobber, but neither lies strictly after the other.
1259 OtherAccess = nullptr;
1260 break;
1261 } // else: keep current OtherAccess since it lies between U and
1262 // Load.
1263 } else {
1264 OtherAccess = I;
1265 }
1266 }
1267 }
1268 }
1269
1270 return OtherAccess;
1271}
1272
1273/// Try to locate the three instruction involved in a missed
1274/// load-elimination case that is due to an intervening store.
1275static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst,
1276 const DominatorTree *DT,
1277 OptimizationRemarkEmitter *ORE) {
1278 using namespace ore;
1279
1280 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load);
1281 R << "load of type " << NV("Type", Load->getType()) << " not eliminated"
1282 << setExtraArgs();
1283
1284 const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT);
1285 if (OtherAccess)
1286 R << " in favor of " << NV("OtherAccess", OtherAccess);
1287
1288 R << " because it is clobbered by " << NV("ClobberedBy", DepInst);
1289
1290 ORE->emit(OptDiag&: R);
1291}
1292
1293// Find a dominating value for Loc memory location in the extended basic block
1294// (chain of basic blocks with single predecessors) starting From instruction.
1295// Returns the value from a matching load or a simple store to the same pointer.
1296static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy,
1297 Instruction *From, AAResults *AA) {
1298 uint32_t NumVisitedInsts = 0;
1299 BasicBlock *FromBB = From->getParent();
1300 BatchAAResults BatchAA(*AA);
1301 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor())
1302 for (auto *Inst = BB == FromBB ? From : BB->getTerminator();
1303 Inst != nullptr; Inst = Inst->getPrevNode()) {
1304 // Stop the search if limit is reached.
1305 if (++NumVisitedInsts > MaxNumVisitedInsts)
1306 return nullptr;
1307 if (isModSet(MRI: BatchAA.getModRefInfo(I: Inst, OptLoc: Loc))) {
1308 // A simple store to the exact location can forward its value.
1309 if (auto *SI = dyn_cast<StoreInst>(Val: Inst))
1310 if (SI->isSimple() && SI->getPointerOperand() == Loc.Ptr &&
1311 SI->getValueOperand()->getType() == LoadTy)
1312 return SI->getValueOperand();
1313 return nullptr;
1314 }
1315 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1316 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy)
1317 return LI;
1318 }
1319 return nullptr;
1320}
1321
1322std::optional<AvailableValue>
1323GVNPass::AnalyzeSelectAvailability(LoadInst *Load, Value *Cond, Value *TrueAddr,
1324 Value *FalseAddr, Instruction *From) {
1325 assert(TrueAddr->getType() == Load->getPointerOperandType() &&
1326 "Invalid address type of true side of select dependency");
1327 assert(FalseAddr->getType() == Load->getPointerOperandType() &&
1328 "Invalid address type of false side of select dependency");
1329 // We can convert a load through a select address into a select of the two
1330 // loaded values only if both sides have a dominating, non-clobbered value of
1331 // the right type in the extended basic block ending at From.
1332 auto Loc = MemoryLocation::get(LI: Load);
1333 Value *V1 = findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: TrueAddr), LoadTy: Load->getType(),
1334 From, AA: getAliasAnalysis());
1335 if (!V1)
1336 return std::nullopt;
1337 Value *V2 = findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: FalseAddr), LoadTy: Load->getType(),
1338 From, AA: getAliasAnalysis());
1339 if (!V2)
1340 return std::nullopt;
1341 return AvailableValue::getSelect(Cond, V1, V2);
1342}
1343
1344std::optional<AvailableValue>
1345GVNPass::AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep,
1346 Value *Address) {
1347 assert(Load->isUnordered() && "rules below are incorrect for ordered access");
1348 assert((Dep.Kind == DepKind::Def || Dep.Kind == DepKind::Clobber) &&
1349 "expected a local dependence");
1350
1351 Instruction *DepInst = Dep.Inst;
1352
1353 const DataLayout &DL = Load->getDataLayout();
1354 if (Dep.Kind == DepKind::Clobber) {
1355 // If the dependence is to a store that writes to a superset of the bits
1356 // read by the load, we can extract the bits we need for the load from the
1357 // stored value.
1358 if (StoreInst *DepSI = dyn_cast<StoreInst>(Val: DepInst)) {
1359 // Can't forward from non-atomic to atomic without violating memory model.
1360 if (Address && Load->isAtomic() <= DepSI->isAtomic()) {
1361 int Offset =
1362 analyzeLoadFromClobberingStore(LoadTy: Load->getType(), LoadPtr: Address, DepSI, DL);
1363 if (Offset != -1)
1364 return AvailableValue::get(V: DepSI->getValueOperand(), Offset);
1365 }
1366 }
1367
1368 // Check to see if we have something like this:
1369 // load i32* P
1370 // load i8* (P+1)
1371 // if we have this, replace the later with an extraction from the former.
1372 if (LoadInst *DepLoad = dyn_cast<LoadInst>(Val: DepInst)) {
1373 // If this is a clobber and L is the first instruction in its block, then
1374 // we have the first instruction in the entry block.
1375 // Can't forward from non-atomic to atomic without violating memory model.
1376 if (DepLoad != Load && Address &&
1377 Load->isAtomic() <= DepLoad->isAtomic()) {
1378 Type *LoadType = Load->getType();
1379 int Offset = Dep.Offset;
1380
1381 if (!isMemorySSAEnabled()) {
1382 // If MD reported clobber, check it was nested.
1383 if (canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType,
1384 F: DepLoad->getFunction())) {
1385 const auto ClobberOff = MD->getClobberOffset(DepInst: DepLoad);
1386 // GVN has no deal with a negative offset.
1387 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0)
1388 ? -1
1389 : *ClobberOff;
1390 }
1391 } else {
1392 if (!canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType,
1393 F: DepLoad->getFunction()) ||
1394 Offset < 0)
1395 Offset = -1;
1396 }
1397 if (Offset == -1)
1398 Offset =
1399 analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr: Address, DepLI: DepLoad, DL);
1400 if (Offset != -1)
1401 return AvailableValue::getLoad(Load: DepLoad, Offset);
1402 }
1403 }
1404
1405 // If the clobbering value is a memset/memcpy/memmove, see if we can
1406 // forward a value on from it.
1407 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) {
1408 if (Address && !Load->isAtomic()) {
1409 int Offset = analyzeLoadFromClobberingMemInst(LoadTy: Load->getType(), LoadPtr: Address,
1410 DepMI, DL);
1411 if (Offset != -1)
1412 return AvailableValue::getMI(MI: DepMI, Offset);
1413 }
1414 }
1415
1416 // Nothing known about this clobber, have to be conservative.
1417 LLVM_DEBUG(
1418 // fast print dep, using operator<< on instruction is too slow.
1419 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1420 dbgs() << " is clobbered by " << *DepInst << '\n';);
1421 if (ORE->allowExtraAnalysis(DEBUG_TYPE))
1422 reportMayClobberedLoad(Load, DepInst, DT, ORE);
1423
1424 return std::nullopt;
1425 }
1426 assert(Dep.Kind == DepKind::Def && "follows from above");
1427
1428 // Loading the alloca -> undef.
1429 // Loading immediately after lifetime begin -> undef.
1430 if (isa<AllocaInst>(Val: DepInst) || isLifetimeStart(Inst: DepInst))
1431 return AvailableValue::get(V: UndefValue::get(T: Load->getType()));
1432
1433 if (Constant *InitVal =
1434 getInitialValueOfAllocation(V: DepInst, TLI, Ty: Load->getType()))
1435 return AvailableValue::get(V: InitVal);
1436
1437 if (StoreInst *S = dyn_cast<StoreInst>(Val: DepInst)) {
1438 // Reject loads and stores that are to the same address but are of
1439 // different types if we have to. If the stored value is convertable to
1440 // the loaded value, we can reuse it.
1441 if (!canCoerceMustAliasedValueToLoad(StoredVal: S->getValueOperand(), LoadTy: Load->getType(),
1442 F: S->getFunction()))
1443 return std::nullopt;
1444
1445 // Can't forward from non-atomic to atomic without violating memory model.
1446 if (S->isAtomic() < Load->isAtomic())
1447 return std::nullopt;
1448
1449 return AvailableValue::get(V: S->getValueOperand());
1450 }
1451
1452 if (LoadInst *LD = dyn_cast<LoadInst>(Val: DepInst)) {
1453 // If the types mismatch and we can't handle it, reject reuse of the load.
1454 // If the stored value is larger or equal to the loaded value, we can reuse
1455 // it.
1456 if (!canCoerceMustAliasedValueToLoad(StoredVal: LD, LoadTy: Load->getType(),
1457 F: LD->getFunction()))
1458 return std::nullopt;
1459
1460 // Can't forward from non-atomic to atomic without violating memory model.
1461 if (LD->isAtomic() < Load->isAtomic())
1462 return std::nullopt;
1463
1464 return AvailableValue::getLoad(Load: LD);
1465 }
1466
1467 // Check if load with Addr dependent from select can be converted to select
1468 // between load values. There must be no instructions between the found
1469 // loads and DepInst that may clobber the loads.
1470 if (auto *Sel = dyn_cast<SelectInst>(Val: DepInst)) {
1471 assert(Sel->getType() == Load->getPointerOperandType());
1472 if (auto AV = AnalyzeSelectAvailability(Load, Cond: Sel->getCondition(),
1473 TrueAddr: Sel->getTrueValue(),
1474 FalseAddr: Sel->getFalseValue(), From: DepInst))
1475 return AV;
1476 return std::nullopt;
1477 }
1478
1479 // Unknown def - must be conservative.
1480 LLVM_DEBUG(
1481 // fast print dep, using operator<< on instruction is too slow.
1482 dbgs() << "GVN: load "; Load->printAsOperand(dbgs());
1483 dbgs() << " has unknown def " << *DepInst << '\n';);
1484 return std::nullopt;
1485}
1486
1487void GVNPass::AnalyzeLoadAvailability(LoadInst *Load,
1488 SmallVectorImpl<ReachingMemVal> &Deps,
1489 AvailValInBlkVect &ValuesPerBlock,
1490 UnavailBlkVect &UnavailableBlocks) {
1491 // Filter out useless results (non-locals, etc). Keep track of the blocks
1492 // where we have a value available in repl, also keep track of whether we see
1493 // dependencies that produce an unknown value for the load (such as a call
1494 // that could potentially clobber the load).
1495 for (const auto &Dep : Deps) {
1496 BasicBlock *DepBB = Dep.Block;
1497
1498 if (DeadBlocks.count(key: DepBB)) {
1499 // Dead dependent mem-op disguise as a load evaluating the same value
1500 // as the load in question.
1501 ValuesPerBlock.push_back(Elt: AvailableValueInBlock::getUndef(BB: DepBB));
1502 continue;
1503 }
1504
1505 if (Dep.Kind == DepKind::Other) {
1506 UnavailableBlocks.push_back(Elt: DepBB);
1507 continue;
1508 }
1509
1510 // The load address is a select in this block: try to rematerialize the
1511 // load as a select of the two reaching values (one per side). The values
1512 // are searched for at the end of DepBB.
1513 if (Dep.Kind == DepKind::Select) {
1514 if (auto AV = AnalyzeSelectAvailability(
1515 Load, Cond: const_cast<Value *>(Dep.SelCond),
1516 TrueAddr: const_cast<Value *>(Dep.SelTrueAddr),
1517 FalseAddr: const_cast<Value *>(Dep.SelFalseAddr), From: DepBB->getTerminator())) {
1518 ValuesPerBlock.push_back(
1519 Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV)));
1520 } else {
1521 UnavailableBlocks.push_back(Elt: DepBB);
1522 }
1523 continue;
1524 }
1525
1526 // The address being loaded in this non-local block may not be the same as
1527 // the pointer operand of the load if PHI translation occurs. Make sure
1528 // to consider the right address.
1529 if (auto AV =
1530 AnalyzeLoadAvailability(Load, Dep, Address: const_cast<Value *>(Dep.Addr))) {
1531 // subtlety: because we know this was a non-local dependency, we know
1532 // it's safe to materialize anywhere between the instruction within
1533 // DepInfo and the end of it's block.
1534 ValuesPerBlock.push_back(
1535 Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV)));
1536 } else {
1537 UnavailableBlocks.push_back(Elt: DepBB);
1538 }
1539 }
1540
1541 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() &&
1542 "post condition violation");
1543}
1544
1545/// Given the following code, v1 is partially available on some edges, but not
1546/// available on the edge from PredBB. This function tries to find if there is
1547/// another identical load in the other successor of PredBB.
1548///
1549/// v0 = load %addr
1550/// br %LoadBB
1551///
1552/// LoadBB:
1553/// v1 = load %addr
1554/// ...
1555///
1556/// PredBB:
1557/// ...
1558/// br %cond, label %LoadBB, label %SuccBB
1559///
1560/// SuccBB:
1561/// v2 = load %addr
1562/// ...
1563///
1564LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
1565 LoadInst *Load) {
1566 // For simplicity we handle a Pred has 2 successors only.
1567 auto *Term = Pred->getTerminator();
1568 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator())
1569 return nullptr;
1570 auto *SuccBB = Term->getSuccessor(Idx: 0);
1571 if (SuccBB == LoadBB)
1572 SuccBB = Term->getSuccessor(Idx: 1);
1573 if (!SuccBB->getSinglePredecessor())
1574 return nullptr;
1575
1576 unsigned int NumInsts = MaxNumInsnsPerBlock;
1577 for (Instruction &Inst : *SuccBB) {
1578 if (Inst.isDebugOrPseudoInst())
1579 continue;
1580 if (--NumInsts == 0)
1581 return nullptr;
1582
1583 if (!Inst.isIdenticalTo(I: Load))
1584 continue;
1585
1586 bool HasLocalDep = true;
1587 if (!isMemorySSAEnabled()) {
1588 MemDepResult Dep = MD->getDependency(QueryInst: &Inst);
1589 HasLocalDep = !Dep.isNonLocal();
1590 } else {
1591 auto *MSSA = MSSAU->getMemorySSA();
1592 // Do not hoist if the identical load has ordering constraint.
1593 if (auto *MA = MSSA->getMemoryAccess(I: &Inst); MA && isa<MemoryUse>(Val: MA)) {
1594 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1595 HasLocalDep = Clobber->getBlock() == SuccBB;
1596 }
1597 }
1598
1599 // If an identical load doesn't depends on any local instructions, it can
1600 // be safely moved to PredBB.
1601 // Also check for the implicit control flow instructions. See the comments
1602 // in PerformLoadPRE for details.
1603 if (!HasLocalDep && !ICF->isDominatedByICFIFromSameBlock(Insn: &Inst))
1604 return cast<LoadInst>(Val: &Inst);
1605
1606 // Otherwise there is something in the same BB clobbers the memory, we can't
1607 // move this and later load to PredBB.
1608 return nullptr;
1609 }
1610
1611 return nullptr;
1612}
1613
1614void GVNPass::eliminatePartiallyRedundantLoad(
1615 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1616 MapVector<BasicBlock *, Value *> &AvailableLoads,
1617 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
1618 for (const auto &AvailableLoad : AvailableLoads) {
1619 BasicBlock *UnavailableBlock = AvailableLoad.first;
1620 Value *LoadPtr = AvailableLoad.second;
1621
1622 auto *NewLoad = new LoadInst(
1623 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(),
1624 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(),
1625 UnavailableBlock->getTerminator()->getIterator());
1626 NewLoad->setDebugLoc(Load->getDebugLoc());
1627 if (MSSAU) {
1628 auto *NewAccess = MSSAU->createMemoryAccessInBB(
1629 I: NewLoad, Definition: nullptr, BB: NewLoad->getParent(), Point: MemorySSA::BeforeTerminator);
1630 if (auto *NewDef = dyn_cast<MemoryDef>(Val: NewAccess))
1631 MSSAU->insertDef(Def: NewDef, /*RenameUses=*/true);
1632 else
1633 MSSAU->insertUse(Use: cast<MemoryUse>(Val: NewAccess), /*RenameUses=*/true);
1634 }
1635
1636 // Transfer the old load's AA tags to the new load.
1637 AAMDNodes Tags = Load->getAAMetadata();
1638 if (Tags)
1639 NewLoad->setAAMetadata(Tags);
1640
1641 if (auto *MD = Load->getMetadata(KindID: LLVMContext::MD_invariant_load))
1642 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD);
1643 if (auto *InvGroupMD = Load->getMetadata(KindID: LLVMContext::MD_invariant_group))
1644 NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: InvGroupMD);
1645 if (auto *RangeMD = Load->getMetadata(KindID: LLVMContext::MD_range))
1646 NewLoad->setMetadata(KindID: LLVMContext::MD_range, Node: RangeMD);
1647 if (auto *NoFPClassMD = Load->getMetadata(KindID: LLVMContext::MD_nofpclass))
1648 NewLoad->setMetadata(KindID: LLVMContext::MD_nofpclass, Node: NoFPClassMD);
1649
1650 if (auto *AccessMD = Load->getMetadata(KindID: LLVMContext::MD_access_group))
1651 if (LI->getLoopFor(BB: Load->getParent()) == LI->getLoopFor(BB: UnavailableBlock))
1652 NewLoad->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessMD);
1653
1654 // We do not propagate the old load's debug location, because the new
1655 // load now lives in a different BB, and we want to avoid a jumpy line
1656 // table.
1657 // FIXME: How do we retain source locations without causing poor debugging
1658 // behavior?
1659
1660 // Add the newly created load.
1661 ValuesPerBlock.push_back(
1662 Elt: AvailableValueInBlock::get(BB: UnavailableBlock, V: NewLoad));
1663 if (MD)
1664 MD->invalidateCachedPointerInfo(Ptr: LoadPtr);
1665 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1666
1667 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
1668 // load instruction with the new created load instruction.
1669 if (CriticalEdgePredAndLoad) {
1670 auto It = CriticalEdgePredAndLoad->find(Key: UnavailableBlock);
1671 if (It != CriticalEdgePredAndLoad->end()) {
1672 ++NumPRELoadMoved2CEPred;
1673 ICF->insertInstructionTo(Inst: NewLoad, BB: UnavailableBlock);
1674 LoadInst *OldLoad = It->second;
1675 combineMetadataForCSE(K: NewLoad, J: OldLoad, /*DoesKMove=*/true);
1676 OldLoad->replaceAllUsesWith(V: NewLoad);
1677 replaceValuesPerBlockEntry(ValuesPerBlock, OldValue: OldLoad, NewValue: NewLoad);
1678 if (uint32_t ValNo = VN.lookup(V: OldLoad, Verify: false))
1679 LeaderTable.erase(N: ValNo, I: OldLoad, BB: OldLoad->getParent());
1680 removeInstruction(I: OldLoad);
1681 }
1682 }
1683 }
1684
1685 // Perform PHI construction.
1686 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
1687 // ConstructSSAForLoadSet is responsible for combining metadata.
1688 ICF->removeUsersOf(Inst: Load);
1689 Load->replaceAllUsesWith(V);
1690 if (isa<PHINode>(Val: V))
1691 V->takeName(V: Load);
1692 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1693 I->setDebugLoc(Load->getDebugLoc());
1694 if (MD && V->getType()->isPtrOrPtrVectorTy())
1695 MD->invalidateCachedPointerInfo(Ptr: V);
1696 ORE->emit(RemarkBuilder: [&]() {
1697 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load)
1698 << "load eliminated by PRE";
1699 });
1700 salvageAndRemoveInstruction(I: Load);
1701}
1702
1703bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
1704 UnavailBlkVect &UnavailableBlocks) {
1705 // Okay, we have *some* definitions of the value. This means that the value
1706 // is available in some of our (transitive) predecessors. Lets think about
1707 // doing PRE of this load. This will involve inserting a new load into the
1708 // predecessor when it's not available. We could do this in general, but
1709 // prefer to not increase code size. As such, we only do this when we know
1710 // that we only have to insert *one* load (which means we're basically moving
1711 // the load, not inserting a new one).
1712
1713 SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks);
1714
1715 // Let's find the first basic block with more than one predecessor. Walk
1716 // backwards through predecessors if needed.
1717 BasicBlock *LoadBB = Load->getParent();
1718 BasicBlock *TmpBB = LoadBB;
1719
1720 // Check that there is no implicit control flow instructions above our load in
1721 // its block. If there is an instruction that doesn't always pass the
1722 // execution to the following instruction, then moving through it may become
1723 // invalid. For example:
1724 //
1725 // int arr[LEN];
1726 // int index = ???;
1727 // ...
1728 // guard(0 <= index && index < LEN);
1729 // use(arr[index]);
1730 //
1731 // It is illegal to move the array access to any point above the guard,
1732 // because if the index is out of bounds we should deoptimize rather than
1733 // access the array.
1734 // Check that there is no guard in this block above our instruction.
1735 bool MustEnsureSafetyOfSpeculativeExecution =
1736 ICF->isDominatedByICFIFromSameBlock(Insn: Load);
1737
1738 while (TmpBB->getSinglePredecessor()) {
1739 TmpBB = TmpBB->getSinglePredecessor();
1740 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1741 return false;
1742 if (Blockers.count(Ptr: TmpBB))
1743 return false;
1744
1745 // If any of these blocks has more than one successor (i.e. if the edge we
1746 // just traversed was critical), then there are other paths through this
1747 // block along which the load may not be anticipated. Hoisting the load
1748 // above this block would be adding the load to execution paths along
1749 // which it was not previously executed.
1750 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1751 return false;
1752
1753 // Check that there is no implicit control flow in a block above.
1754 MustEnsureSafetyOfSpeculativeExecution =
1755 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(BB: TmpBB);
1756 }
1757
1758 assert(TmpBB);
1759 LoadBB = TmpBB;
1760
1761 // Check to see how many predecessors have the loaded value fully
1762 // available.
1763 MapVector<BasicBlock *, Value *> PredLoads;
1764 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks;
1765 for (const AvailableValueInBlock &AV : ValuesPerBlock)
1766 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available;
1767 for (BasicBlock *UnavailableBB : UnavailableBlocks)
1768 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
1769
1770 // The edge from Pred to LoadBB is a critical edge will be splitted.
1771 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
1772 // The edge from Pred to LoadBB is a critical edge, another successor of Pred
1773 // contains a load can be moved to Pred. This data structure maps the Pred to
1774 // the movable load.
1775 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
1776 for (BasicBlock *Pred : predecessors(BB: LoadBB)) {
1777 // If any predecessor block is an EH pad that does not allow non-PHI
1778 // instructions before the terminator, we can't PRE the load.
1779 if (Pred->getTerminator()->isEHPad()) {
1780 LLVM_DEBUG(
1781 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
1782 << Pred->getName() << "': " << *Load << '\n');
1783 return false;
1784 }
1785
1786 if (IsValueFullyAvailableInBlock(BB: Pred, FullyAvailableBlocks)) {
1787 continue;
1788 }
1789
1790 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1791 if (isa<IndirectBrInst>(Val: Pred->getTerminator())) {
1792 LLVM_DEBUG(
1793 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1794 << Pred->getName() << "': " << *Load << '\n');
1795 return false;
1796 }
1797
1798 if (LoadBB->isEHPad()) {
1799 LLVM_DEBUG(
1800 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
1801 << Pred->getName() << "': " << *Load << '\n');
1802 return false;
1803 }
1804
1805 // Do not split backedge as it will break the canonical loop form.
1806 if (!isLoadPRESplitBackedgeEnabled())
1807 if (DT->dominates(A: LoadBB, B: Pred)) {
1808 LLVM_DEBUG(
1809 dbgs()
1810 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '"
1811 << Pred->getName() << "': " << *Load << '\n');
1812 return false;
1813 }
1814
1815 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
1816 CriticalEdgePredAndLoad[Pred] = LI;
1817 else
1818 CriticalEdgePredSplit.push_back(Elt: Pred);
1819 } else {
1820 // Only add the predecessors that will not be split for now.
1821 PredLoads[Pred] = nullptr;
1822 }
1823 }
1824
1825 // Decide whether PRE is profitable for this load.
1826 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
1827 unsigned NumUnavailablePreds = NumInsertPreds +
1828 CriticalEdgePredAndLoad.size();
1829 assert(NumUnavailablePreds != 0 &&
1830 "Fully available value should already be eliminated!");
1831 (void)NumUnavailablePreds;
1832
1833 // If we need to insert new load in multiple predecessors, reject it.
1834 // FIXME: If we could restructure the CFG, we could make a common pred with
1835 // all the preds that don't have an available Load and insert a new load into
1836 // that one block.
1837 if (NumInsertPreds > 1)
1838 return false;
1839
1840 // Now we know where we will insert load. We must ensure that it is safe
1841 // to speculatively execute the load at that points.
1842 if (MustEnsureSafetyOfSpeculativeExecution) {
1843 if (CriticalEdgePredSplit.size())
1844 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: &*LoadBB->getFirstNonPHIIt(), AC,
1845 DT))
1846 return false;
1847 for (auto &PL : PredLoads)
1848 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: PL.first->getTerminator(), AC,
1849 DT))
1850 return false;
1851 for (auto &CEP : CriticalEdgePredAndLoad)
1852 if (!isSafeToSpeculativelyExecute(I: Load, CtxI: CEP.first->getTerminator(), AC,
1853 DT))
1854 return false;
1855 }
1856
1857 // Split critical edges, and update the unavailable predecessors accordingly.
1858 for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
1859 BasicBlock *NewPred = splitCriticalEdges(Pred: OrigPred, Succ: LoadBB);
1860 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
1861 PredLoads[NewPred] = nullptr;
1862 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
1863 << LoadBB->getName() << '\n');
1864 }
1865
1866 for (auto &CEP : CriticalEdgePredAndLoad)
1867 PredLoads[CEP.first] = nullptr;
1868
1869 // Check if the load can safely be moved to all the unavailable predecessors.
1870 bool CanDoPRE = true;
1871 const DataLayout &DL = Load->getDataLayout();
1872 SmallVector<Instruction*, 8> NewInsts;
1873 for (auto &PredLoad : PredLoads) {
1874 BasicBlock *UnavailablePred = PredLoad.first;
1875
1876 // Do PHI translation to get its value in the predecessor if necessary. The
1877 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1878 // We do the translation for each edge we skipped by going from Load's block
1879 // to LoadBB, otherwise we might miss pieces needing translation.
1880
1881 // If all preds have a single successor, then we know it is safe to insert
1882 // the load on the pred (?!?), so we can insert code to materialize the
1883 // pointer if it is not available.
1884 Value *LoadPtr = Load->getPointerOperand();
1885 BasicBlock *Cur = Load->getParent();
1886 while (Cur != LoadBB) {
1887 PHITransAddr Address(LoadPtr, DL, AC);
1888 LoadPtr = Address.translateWithInsertion(CurBB: Cur, PredBB: Cur->getSinglePredecessor(),
1889 DT: *DT, NewInsts);
1890 if (!LoadPtr) {
1891 CanDoPRE = false;
1892 break;
1893 }
1894 Cur = Cur->getSinglePredecessor();
1895 }
1896
1897 if (LoadPtr) {
1898 PHITransAddr Address(LoadPtr, DL, AC);
1899 LoadPtr = Address.translateWithInsertion(CurBB: LoadBB, PredBB: UnavailablePred, DT: *DT,
1900 NewInsts);
1901 }
1902 // If we couldn't find or insert a computation of this phi translated value,
1903 // we fail PRE.
1904 if (!LoadPtr) {
1905 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1906 << *Load->getPointerOperand() << "\n");
1907 CanDoPRE = false;
1908 break;
1909 }
1910
1911 PredLoad.second = LoadPtr;
1912 }
1913
1914 if (!CanDoPRE) {
1915 while (!NewInsts.empty()) {
1916 // Erase instructions generated by the failed PHI translation before
1917 // trying to number them. PHI translation might insert instructions
1918 // in basic blocks other than the current one, and we delete them
1919 // directly, as salvageAndRemoveInstruction only allows removing from the
1920 // current basic block.
1921 NewInsts.pop_back_val()->eraseFromParent();
1922 }
1923 // HINT: Don't revert the edge-splitting as following transformation may
1924 // also need to split these critical edges.
1925 return !CriticalEdgePredSplit.empty();
1926 }
1927
1928 // Okay, we can eliminate this load by inserting a reload in the predecessor
1929 // and using PHI construction to get the value in the other predecessors, do
1930 // it.
1931 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n');
1932 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size()
1933 << " INSTS: " << *NewInsts.back()
1934 << '\n');
1935
1936 // Assign value numbers to the new instructions.
1937 for (Instruction *I : NewInsts) {
1938 // Instructions that have been inserted in predecessor(s) to materialize
1939 // the load address do not retain their original debug locations. Doing
1940 // so could lead to confusing (but correct) source attributions.
1941 I->updateLocationAfterHoist();
1942
1943 // FIXME: We really _ought_ to insert these value numbers into their
1944 // parent's availability map. However, in doing so, we risk getting into
1945 // ordering issues. If a block hasn't been processed yet, we would be
1946 // marking a value as AVAIL-IN, which isn't what we intend.
1947 VN.lookupOrAdd(V: I);
1948 }
1949
1950 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads&: PredLoads,
1951 CriticalEdgePredAndLoad: &CriticalEdgePredAndLoad);
1952 ++NumPRELoad;
1953 return true;
1954}
1955
1956bool GVNPass::performLoopLoadPRE(LoadInst *Load,
1957 AvailValInBlkVect &ValuesPerBlock,
1958 UnavailBlkVect &UnavailableBlocks) {
1959 const Loop *L = LI->getLoopFor(BB: Load->getParent());
1960 // TODO: Generalize to other loop blocks that dominate the latch.
1961 if (!L || L->getHeader() != Load->getParent())
1962 return false;
1963
1964 BasicBlock *Preheader = L->getLoopPreheader();
1965 BasicBlock *Latch = L->getLoopLatch();
1966 if (!Preheader || !Latch)
1967 return false;
1968
1969 Value *LoadPtr = Load->getPointerOperand();
1970 // Must be available in preheader.
1971 if (!L->isLoopInvariant(V: LoadPtr))
1972 return false;
1973
1974 // We plan to hoist the load to preheader without introducing a new fault.
1975 // In order to do it, we need to prove that we cannot side-exit the loop
1976 // once loop header is first entered before execution of the load.
1977 if (ICF->isDominatedByICFIFromSameBlock(Insn: Load))
1978 return false;
1979
1980 BasicBlock *LoopBlock = nullptr;
1981 for (auto *Blocker : UnavailableBlocks) {
1982 // Blockers from outside the loop are handled in preheader.
1983 if (!L->contains(BB: Blocker))
1984 continue;
1985
1986 // Only allow one loop block. Loop header is not less frequently executed
1987 // than each loop block, and likely it is much more frequently executed. But
1988 // in case of multiple loop blocks, we need extra information (such as block
1989 // frequency info) to understand whether it is profitable to PRE into
1990 // multiple loop blocks.
1991 if (LoopBlock)
1992 return false;
1993
1994 // Do not sink into inner loops. This may be non-profitable.
1995 if (L != LI->getLoopFor(BB: Blocker))
1996 return false;
1997
1998 // Blocks that dominate the latch execute on every single iteration, maybe
1999 // except the last one. So PREing into these blocks doesn't make much sense
2000 // in most cases. But the blocks that do not necessarily execute on each
2001 // iteration are sometimes much colder than the header, and this is when
2002 // PRE is potentially profitable.
2003 if (DT->dominates(A: Blocker, B: Latch))
2004 return false;
2005
2006 // Make sure that the terminator itself doesn't clobber.
2007 if (Blocker->getTerminator()->mayWriteToMemory())
2008 return false;
2009
2010 LoopBlock = Blocker;
2011 }
2012
2013 if (!LoopBlock)
2014 return false;
2015
2016 // Make sure the memory at this pointer cannot be freed, therefore we can
2017 // safely reload from it after clobber.
2018 if (LoadPtr->canBeFreed())
2019 return false;
2020
2021 // TODO: Support critical edge splitting if blocker has more than 1 successor.
2022 MapVector<BasicBlock *, Value *> AvailableLoads;
2023 AvailableLoads[LoopBlock] = LoadPtr;
2024 AvailableLoads[Preheader] = LoadPtr;
2025
2026 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
2027 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
2028 /*CriticalEdgePredAndLoad*/ nullptr);
2029 ++NumPRELoopLoad;
2030 return true;
2031}
2032
2033static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
2034 OptimizationRemarkEmitter *ORE) {
2035 using namespace ore;
2036
2037 ORE->emit(RemarkBuilder: [&]() {
2038 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load)
2039 << "load of type " << NV("Type", Load->getType()) << " eliminated"
2040 << setExtraArgs() << " in favor of "
2041 << NV("InfavorOfValue", AvailableValue);
2042 });
2043}
2044
2045/// Attempt to eliminate a load whose dependencies are
2046/// non-local by performing PHI construction.
2047bool GVNPass::processNonLocalLoad(LoadInst *Load) {
2048 // Non-local speculations are not allowed under asan.
2049 if (Load->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
2050 Load->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress))
2051 return false;
2052
2053 // Find the non-local dependencies of the load.
2054 LoadDepVect Deps;
2055 MD->getNonLocalPointerDependency(QueryInst: Load, Result&: Deps);
2056
2057 // If we had to process more than one hundred blocks to find the
2058 // dependencies, this load isn't worth worrying about. Optimizing
2059 // it will be too expensive.
2060 unsigned NumDeps = Deps.size();
2061 if (NumDeps > MaxNumDeps)
2062 return false;
2063
2064 SmallVector<ReachingMemVal, 64> MemVals;
2065 MemVals.reserve(N: Deps.size());
2066
2067 for (const NonLocalDepResult &Dep : Deps) {
2068 const auto &R = Dep.getResult();
2069 SelectAddr SelAddr = Dep.getAddress();
2070 BasicBlock *BB = Dep.getBB();
2071 Instruction *Inst = R.getInst();
2072 if (R.isSelect()) {
2073 auto [Cond, Addrs] = SelAddr.getSelectCondAndAddrs();
2074 MemVals.emplace_back(
2075 Args: ReachingMemVal::getSelect(BB, Cond, TrueAddr: Addrs.first, FalseAddr: Addrs.second));
2076 continue;
2077 }
2078 Value *Address = SelAddr.getAddr();
2079 if (R.isClobber())
2080 MemVals.emplace_back(Args: ReachingMemVal::getClobber(Addr: Address, Inst));
2081 else if (R.isDef())
2082 MemVals.emplace_back(Args: ReachingMemVal::getDef(Addr: Address, Inst));
2083 else
2084 MemVals.emplace_back(Args: ReachingMemVal::getUnknown(BB, Addr: Address, Inst));
2085 }
2086
2087 return processNonLocalLoad(L: Load, Deps&: MemVals);
2088}
2089
2090bool GVNPass::processNonLocalLoad(LoadInst *Load,
2091 SmallVectorImpl<ReachingMemVal> &Deps) {
2092 // If we had a phi translation failure, we'll have a single entry which is a
2093 // clobber in the current block. Reject this early.
2094 if (Deps.size() == 1 && Deps[0].Kind == DepKind::Other) {
2095 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs());
2096 dbgs() << " has unknown dependencies\n";);
2097 return false;
2098 }
2099
2100 bool Changed = false;
2101 // This is a limited form of scalar PRE for load indices. If this load follows
2102 // a GEP, see if we can PRE the indices before analyzing.
2103 if (isScalarPREEnabled()) {
2104 if (GetElementPtrInst *GEP =
2105 dyn_cast<GetElementPtrInst>(Val: Load->getOperand(i_nocapture: 0))) {
2106 for (Use &U : GEP->indices())
2107 if (Instruction *I = dyn_cast<Instruction>(Val: U.get()))
2108 Changed |= performScalarPRE(I);
2109 }
2110 }
2111
2112 // Step 1: Analyze the availability of the load.
2113 AvailValInBlkVect ValuesPerBlock;
2114 UnavailBlkVect UnavailableBlocks;
2115 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks);
2116
2117 // If we have no predecessors that produce a known value for this load, exit
2118 // early.
2119 if (ValuesPerBlock.empty())
2120 return Changed;
2121
2122 // Step 2: Eliminate fully redundancy.
2123 //
2124 // If all of the instructions we depend on produce a known value for this
2125 // load, then it is fully redundant and we can use PHI insertion to compute
2126 // its value. Insert PHIs and remove the fully redundant value now.
2127 if (UnavailableBlocks.empty()) {
2128 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n');
2129
2130 // Perform PHI construction.
2131 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this);
2132 // ConstructSSAForLoadSet is responsible for combining metadata.
2133 ICF->removeUsersOf(Inst: Load);
2134 Load->replaceAllUsesWith(V);
2135
2136 if (isa<PHINode>(Val: V))
2137 V->takeName(V: Load);
2138 if (Instruction *I = dyn_cast<Instruction>(Val: V))
2139 // If instruction I has debug info, then we should not update it.
2140 // Also, if I has a null DebugLoc, then it is still potentially incorrect
2141 // to propagate Load's DebugLoc because Load may not post-dominate I.
2142 if (Load->getDebugLoc() && Load->getParent() == I->getParent())
2143 I->setDebugLoc(Load->getDebugLoc());
2144 if (MD && V->getType()->isPtrOrPtrVectorTy())
2145 MD->invalidateCachedPointerInfo(Ptr: V);
2146 ++NumGVNLoad;
2147 reportLoadElim(Load, AvailableValue: V, ORE);
2148 salvageAndRemoveInstruction(I: Load);
2149 return true;
2150 }
2151
2152 // Step 3: Eliminate partial redundancy.
2153 if (!isLoadPREEnabled())
2154 return Changed;
2155 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(BB: Load->getParent()))
2156 return Changed;
2157
2158 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
2159 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
2160 return true;
2161
2162 return Changed;
2163}
2164
2165bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
2166 Value *V = IntrinsicI->getArgOperand(i: 0);
2167
2168 if (ConstantInt *Cond = dyn_cast<ConstantInt>(Val: V)) {
2169 if (Cond->isZero()) {
2170 Type *Int8Ty = Type::getInt8Ty(C&: V->getContext());
2171 Type *PtrTy = PointerType::get(C&: V->getContext(), AddressSpace: 0);
2172 // Insert a new store to null instruction before the load to indicate that
2173 // this code is not reachable. FIXME: We could insert unreachable
2174 // instruction directly because we can modify the CFG.
2175 auto *NewS =
2176 new StoreInst(PoisonValue::get(T: Int8Ty), Constant::getNullValue(Ty: PtrTy),
2177 IntrinsicI->getIterator());
2178 if (MSSAU) {
2179 const MemoryUseOrDef *FirstNonDom = nullptr;
2180 const auto *AL =
2181 MSSAU->getMemorySSA()->getBlockAccesses(BB: IntrinsicI->getParent());
2182
2183 // If there are accesses in the current basic block, find the first one
2184 // that does not come before NewS. The new memory access is inserted
2185 // after the found access or before the terminator if no such access is
2186 // found.
2187 if (AL) {
2188 for (const auto &Acc : *AL) {
2189 if (auto *Current = dyn_cast<MemoryUseOrDef>(Val: &Acc))
2190 if (!Current->getMemoryInst()->comesBefore(Other: NewS)) {
2191 FirstNonDom = Current;
2192 break;
2193 }
2194 }
2195 }
2196
2197 auto *NewDef =
2198 FirstNonDom ? MSSAU->createMemoryAccessBefore(
2199 I: NewS, Definition: nullptr,
2200 InsertPt: const_cast<MemoryUseOrDef *>(FirstNonDom))
2201 : MSSAU->createMemoryAccessInBB(
2202 I: NewS, Definition: nullptr,
2203 BB: NewS->getParent(), Point: MemorySSA::BeforeTerminator);
2204
2205 MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewDef), /*RenameUses=*/false);
2206 }
2207 }
2208 if (isAssumeWithEmptyBundle(Assume: *IntrinsicI)) {
2209 salvageAndRemoveInstruction(I: IntrinsicI);
2210 return true;
2211 }
2212 return false;
2213 }
2214
2215 if (isa<Constant>(Val: V)) {
2216 // If it's not false, and constant, it must evaluate to true. This means our
2217 // assume is assume(true), and thus, pointless, and we don't want to do
2218 // anything more here.
2219 return false;
2220 }
2221
2222 Constant *True = ConstantInt::getTrue(Context&: V->getContext());
2223 return propagateEquality(LHS: V, RHS: True, Root: IntrinsicI);
2224}
2225
2226static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
2227 patchReplacementInstruction(I, Repl);
2228 I->replaceAllUsesWith(V: Repl);
2229}
2230
2231/// If a load has !invariant.group, try to find the most-dominating instruction
2232/// with the same metadata and equivalent pointer (modulo bitcasts and zero
2233/// GEPs). If one is found that dominates the load, its value can be reused.
2234static Instruction *findInvariantGroupValue(LoadInst *L, DominatorTree &DT) {
2235 Value *PointerOperand = L->getPointerOperand()->stripPointerCasts();
2236
2237 // It's not safe to walk the use list of a global value because function
2238 // passes aren't allowed to look outside their functions.
2239 // FIXME: this could be fixed by filtering instructions from outside of
2240 // current function.
2241 if (isa<Constant>(Val: PointerOperand))
2242 return nullptr;
2243
2244 // Queue to process all pointers that are equivalent to load operand.
2245 SmallVector<Value *, 8> PointerUsesQueue;
2246 PointerUsesQueue.push_back(Elt: PointerOperand);
2247
2248 Instruction *MostDominatingInstruction = L;
2249
2250 // FIXME: This loop is potentially O(n^2) due to repeated dominates checks.
2251 while (!PointerUsesQueue.empty()) {
2252 Value *Ptr = PointerUsesQueue.pop_back_val();
2253 assert(Ptr && !isa<GlobalValue>(Ptr) &&
2254 "Null or GlobalValue should not be inserted");
2255
2256 for (User *U : Ptr->users()) {
2257 auto *I = dyn_cast<Instruction>(Val: U);
2258 if (!I || I == L || !DT.dominates(Def: I, User: MostDominatingInstruction))
2259 continue;
2260
2261 // Add bitcasts and zero GEPs to queue.
2262 // TODO: Should drop bitcast?
2263 if (isa<BitCastInst>(Val: I) ||
2264 (isa<GetElementPtrInst>(Val: I) &&
2265 cast<GetElementPtrInst>(Val: I)->hasAllZeroIndices())) {
2266 PointerUsesQueue.push_back(Elt: I);
2267 continue;
2268 }
2269
2270 // If we hit a load/store with an invariant.group metadata and the same
2271 // pointer operand, we can assume that value pointed to by the pointer
2272 // operand didn't change.
2273 if (I->hasMetadata(KindID: LLVMContext::MD_invariant_group) &&
2274 Ptr == getLoadStorePointerOperand(V: I) && !I->isVolatile())
2275 MostDominatingInstruction = I;
2276 }
2277 }
2278
2279 return MostDominatingInstruction != L ? MostDominatingInstruction : nullptr;
2280}
2281
2282/// Return the memory location accessed by the (masked) load/store instruction
2283/// `I`, if the instruction could potentially provide a useful value for
2284/// eliminating the load.
2285static std::optional<MemoryLocation>
2286maybeLoadStoreLocation(Instruction *I, bool AllowStores,
2287 const TargetLibraryInfo *TLI) {
2288 if (auto *LI = dyn_cast<LoadInst>(Val: I))
2289 return MemoryLocation::get(LI);
2290
2291 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
2292 switch (II->getIntrinsicID()) {
2293 case Intrinsic::masked_load:
2294 return MemoryLocation::getForArgument(Call: II, ArgIdx: 0, TLI);
2295 case Intrinsic::masked_store:
2296 if (AllowStores)
2297 return MemoryLocation::getForArgument(Call: II, ArgIdx: 1, TLI);
2298 return std::nullopt;
2299 default:
2300 break;
2301 }
2302 }
2303
2304 if (!AllowStores)
2305 return std::nullopt;
2306
2307 if (auto *SI = dyn_cast<StoreInst>(Val: I))
2308 return MemoryLocation::get(SI);
2309 return std::nullopt;
2310}
2311
2312/// Scan the users of each MemoryAccess in `ClobbersList` that belong to `BB`,
2313/// looking for memory reads whose location aliases `Loc` and dominates our
2314/// load.
2315std::optional<GVNPass::ReachingMemVal> GVNPass::scanMemoryAccessesUsers(
2316 const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB,
2317 const SmallVectorImpl<MemoryAccess *> &ClobbersList, MemorySSA &MSSA,
2318 BatchAAResults &AA, LoadInst *L) {
2319
2320 // Prefer a candidate that is closer to the load within the same block.
2321 auto UpdateChoice = [&](std::optional<ReachingMemVal> &Choice,
2322 AliasResult &AR, Instruction *Candidate) {
2323 if (!Choice) {
2324 if (AR == AliasResult::PartialAlias)
2325 Choice = ReachingMemVal::getClobber(Addr: Loc.Ptr, Inst: Candidate, Offset: AR.getOffset());
2326 else
2327 Choice = ReachingMemVal::getDef(Addr: Loc.Ptr, Inst: Candidate);
2328 return;
2329 }
2330 if (!MSSA.locallyDominates(A: MSSA.getMemoryAccess(I: Choice->Inst),
2331 B: MSSA.getMemoryAccess(I: Candidate)))
2332 return;
2333
2334 if (AR == AliasResult::PartialAlias) {
2335 Choice->Kind = DepKind::Clobber;
2336 Choice->Offset = AR.getOffset();
2337 } else {
2338 Choice->Kind = DepKind::Def;
2339 Choice->Offset = -1;
2340 }
2341
2342 Choice->Inst = Candidate;
2343 Choice->Block = Candidate->getParent();
2344 };
2345
2346 std::optional<ReachingMemVal> ReachingVal;
2347 for (MemoryAccess *MA : ClobbersList) {
2348 unsigned Scanned = 0;
2349 for (User *U : MA->users()) {
2350 if (++Scanned >= ScanUsersLimit)
2351 return ReachingMemVal::getUnknown(BB, Addr: Loc.Ptr);
2352
2353 auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U);
2354 if (!UseOrDef || UseOrDef->getBlock() != BB)
2355 continue;
2356
2357 Instruction *MemI = UseOrDef->getMemoryInst();
2358 if (MemI == L ||
2359 (L && !MSSA.locallyDominates(A: UseOrDef, B: MSSA.getMemoryAccess(I: L))))
2360 continue;
2361
2362 if (auto MaybeLoc = maybeLoadStoreLocation(I: MemI, AllowStores: IsInvariantLoad, TLI)) {
2363 AliasResult AR = AA.alias(LocA: *MaybeLoc, LocB: Loc);
2364 // If the locations do not certainly alias, we cannot possibly infer the
2365 // following load loads the same value.
2366 if (AR == AliasResult::NoAlias || AR == AliasResult::MayAlias)
2367 continue;
2368
2369 // Locations partially overlap, but neither is a subset of the other, or
2370 // the second location is before the first.
2371 if (AR == AliasResult::PartialAlias &&
2372 (!AR.hasOffset() || AR.getOffset() < 0))
2373 continue;
2374
2375 // Found candidate, the new load memory location and the given location
2376 // must alias: precise overlap, or subset with non-negative offset.
2377 UpdateChoice(ReachingVal, AR, MemI);
2378 }
2379 }
2380 if (ReachingVal)
2381 break;
2382 }
2383
2384 return ReachingVal;
2385}
2386
2387/// Check if a given MemoryAccess (usually a MemoryDef) actually modifies a
2388/// given location. Returns a ReachingMemVal describing the dependency.
2389std::optional<GVNPass::ReachingMemVal> GVNPass::accessMayModifyLocation(
2390 MemoryAccess *ClobberMA, const MemoryLocation &Loc, bool IsInvariantLoad,
2391 BasicBlock *BB, MemorySSA &MSSA, BatchAAResults &AA) {
2392 assert(ClobberMA->getBlock() == BB);
2393
2394 // If the clobbering access is the entry memory state, we cannot say anything
2395 // about the content of the memory, except when we are accessing a local
2396 // object, which can be turned later into producing `undef`.
2397 if (MSSA.isLiveOnEntryDef(MA: ClobberMA)) {
2398 if (auto *Alloc = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Loc.Ptr)))
2399 if (Alloc->getParent() == BB)
2400 return ReachingMemVal::getDef(Addr: Loc.Ptr, Inst: const_cast<AllocaInst *>(Alloc));
2401 return ReachingMemVal::getUnknown(BB, Addr: Loc.Ptr);
2402 }
2403
2404 // Loads from "constant" memory can't be clobbered.
2405 if (IsInvariantLoad || AA.pointsToConstantMemory(Loc))
2406 return std::nullopt;
2407
2408 auto GetOrdering = [](const Instruction *I) {
2409 if (auto *L = dyn_cast<LoadInst>(Val: I))
2410 return L->getOrdering();
2411 return cast<StoreInst>(Val: I)->getOrdering();
2412 };
2413 Instruction *ClobberI = cast<MemoryDef>(Val: ClobberMA)->getMemoryInst();
2414
2415 // Check if the clobbering access is a load or a store that we can reuse.
2416 if (auto MaybeLoc = maybeLoadStoreLocation(I: ClobberI, AllowStores: true, TLI)) {
2417 AliasResult AR = AA.alias(LocA: *MaybeLoc, LocB: Loc);
2418 if (AR == AliasResult::MustAlias)
2419 return ReachingMemVal::getDef(Addr: Loc.Ptr, Inst: ClobberI);
2420
2421 if (AR == AliasResult::NoAlias) {
2422 // If the locations do not alias we may still be able to skip over the
2423 // clobbering instruction, even if it is atomic.
2424 // The original load is either non-atomic or unordered. We can reorder
2425 // these across non-atomic, unordered or monotonic loads or across any
2426 // store.
2427 if (!ClobberI->isAtomic() ||
2428 !isStrongerThan(AO: GetOrdering(ClobberI), Other: AtomicOrdering::Monotonic) ||
2429 isa<StoreInst>(Val: ClobberI))
2430 return std::nullopt;
2431 return ReachingMemVal::getClobber(Addr: Loc.Ptr, Inst: ClobberI);
2432 }
2433
2434 // Skip over volatile loads (the original load is non-volatile, non-atomic).
2435 if (!ClobberI->isAtomic() && isa<LoadInst>(Val: ClobberI))
2436 return std::nullopt;
2437
2438 if (AR == AliasResult::MayAlias ||
2439 (AR == AliasResult::PartialAlias &&
2440 (!AR.hasOffset() || AR.getOffset() < 0)))
2441 return ReachingMemVal::getClobber(Addr: Loc.Ptr, Inst: ClobberI);
2442
2443 // The only option left is a store of the superset of the required bits.
2444 assert(AR == AliasResult::PartialAlias && AR.hasOffset() &&
2445 AR.getOffset() > 0 &&
2446 "Must be the superset/partial overlap case with positive offset");
2447 return ReachingMemVal::getClobber(Addr: Loc.Ptr, Inst: ClobberI, Offset: AR.getOffset());
2448 }
2449
2450 if (auto *II = dyn_cast<IntrinsicInst>(Val: ClobberI)) {
2451 if (isa<DbgInfoIntrinsic>(Val: II))
2452 return std::nullopt;
2453 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
2454 MemoryLocation IIObjLoc = MemoryLocation::getForArgument(Call: II, ArgIdx: 0, TLI);
2455 if (AA.isMustAlias(LocA: IIObjLoc, LocB: Loc))
2456 return ReachingMemVal::getDef(Addr: Loc.Ptr, Inst: ClobberI);
2457 return std::nullopt;
2458 }
2459 }
2460
2461 // If we are at a malloc-like function call, we can turn the load into `undef`
2462 // or zero.
2463 if (isNoAliasCall(V: ClobberI)) {
2464 const Value *Obj = getUnderlyingObject(V: Loc.Ptr);
2465 if (Obj == ClobberI || AA.isMustAlias(V1: ClobberI, V2: Loc.Ptr))
2466 return ReachingMemVal::getDef(Addr: Loc.Ptr, Inst: ClobberI);
2467 }
2468
2469 // Can reorder loads across a release fence.
2470 if (auto *FI = dyn_cast<FenceInst>(Val: ClobberI))
2471 if (FI->getOrdering() == AtomicOrdering::Release)
2472 return std::nullopt;
2473
2474 // See if the clobber instruction (e.g., a generic call) may modify the
2475 // location.
2476 ModRefInfo MR = AA.getModRefInfo(I: ClobberI, OptLoc: Loc);
2477 // If may modify the location, analyze deeper, to exclude accesses to
2478 // non-escaping local allocations.
2479 if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref)
2480 return std::nullopt;
2481
2482 // Conservatively assume the clobbering memory access may overwrite the
2483 // location.
2484 return ReachingMemVal::getClobber(Addr: Loc.Ptr, Inst: ClobberI);
2485}
2486
2487/// Collect the predecessors of block, while doing phi-translation of the memory
2488/// address and the memory clobber. Return false if the block should be marked
2489/// as clobbering the memory location in an unknown way.
2490bool GVNPass::collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr,
2491 MemoryAccess *ClobberMA,
2492 DependencyBlockSet &Blocks,
2493 SmallVectorImpl<BasicBlock *> &Worklist) {
2494 if (Addr.needsPHITranslationFromBlock(BB) &&
2495 !Addr.isPotentiallyPHITranslatable())
2496 return false;
2497
2498 auto *MPhi =
2499 ClobberMA->getBlock() == BB ? dyn_cast<MemoryPhi>(Val: ClobberMA) : nullptr;
2500 SmallVector<std::pair<BasicBlock *, DependencyBlockInfo>, 8> Preds;
2501 for (BasicBlock *Pred : predecessors(BB)) {
2502 // Skip unreachable predecessors.
2503 if (!DT->isReachableFromEntry(A: Pred))
2504 continue;
2505
2506 // Skip already visited predecessors.
2507 if (llvm::any_of(Range&: Preds, P: [Pred](const auto &P) { return P.first == Pred; }))
2508 continue;
2509
2510 PHITransAddr TransAddr = Addr;
2511 if (TransAddr.needsPHITranslationFromBlock(BB))
2512 TransAddr.translateValue(CurBB: BB, PredBB: Pred, DT, MustDominate: false);
2513
2514 auto It = Blocks.find(Val: Pred);
2515 if (It != Blocks.end()) {
2516 // If we reach a visited block with a different address, set the
2517 // current block as clobbering the memory location in an unknown way
2518 // (by returning false).
2519 if (It->second.Addr.getAddr() != TransAddr.getAddr())
2520 return false;
2521 // Otherwise, just stop the traversal.
2522 continue;
2523 }
2524
2525 Preds.emplace_back(
2526 Args&: Pred, Args: DependencyBlockInfo(TransAddr,
2527 MPhi ? MPhi->getIncomingValueForBlock(BB: Pred)
2528 : ClobberMA));
2529 }
2530
2531 // We collected the predecessors and stored them in Preds. Now, populate the
2532 // worklist with the predecessors found, and cache the eventual translated
2533 // address for each block.
2534 for (auto &P : Preds) {
2535 [[maybe_unused]] auto It =
2536 Blocks.try_emplace(Key: P.first, Args: std::move(P.second)).first;
2537 Worklist.push_back(Elt: P.first);
2538 }
2539
2540 return true;
2541}
2542
2543/// Build a list of MemoryAccesses whose users could potentially alias the
2544/// memory location being queried. Starts from StartInfo's initial clobber,
2545/// walk the use-def chain to the final clobber. If the chain extends beyond
2546/// `BB`, continue into that block but only if it is in the previously collected
2547/// set.
2548void GVNPass::collectClobberList(SmallVectorImpl<MemoryAccess *> &Clobbers,
2549 BasicBlock *BB,
2550 const DependencyBlockInfo &StartInfo,
2551 const DependencyBlockSet &Blocks,
2552 MemorySSA &MSSA) {
2553 MemoryAccess *MA = StartInfo.InitialClobberMA;
2554 MemoryAccess *LastMA = StartInfo.ClobberMA;
2555
2556 for (;;) {
2557 while (MA != LastMA) {
2558 Clobbers.push_back(Elt: MA);
2559 MA = cast<MemoryUseOrDef>(Val: MA)->getDefiningAccess();
2560 }
2561 Clobbers.push_back(Elt: MA);
2562
2563 if (MSSA.isLiveOnEntryDef(MA) ||
2564 (MA->getBlock() == BB && !isa<MemoryPhi>(Val: MA)))
2565 break;
2566
2567 // If the final clobber in the current block is a MemoryPhi, go to the
2568 // immediate dominator; otherwise, just get to the block containing the
2569 // final clobber.
2570 if (MA->getBlock() == BB)
2571 BB = DT->getNode(BB)->getIDom()->getBlock();
2572 else
2573 BB = MA->getBlock();
2574
2575 auto It = Blocks.find(Val: BB);
2576 if (It == Blocks.end())
2577 break;
2578
2579 MA = It->second.InitialClobberMA;
2580 LastMA = It->second.ClobberMA;
2581 if (MA == Clobbers.back())
2582 Clobbers.pop_back();
2583 }
2584}
2585
2586/// Entrypoint for the MemorySSA-based redundant load elimination algorithm.
2587/// Given as input a load instruction, the function computes the set of reaching
2588/// memory values, one per predecessor path, that AnalyzeLoadAvailability can
2589/// later use to establish whether the load may be eliminated. A reaching value
2590/// may be of the following descriptor kind:
2591/// * Def: a precise instruction that produces the exact bits the load would
2592/// read (e.g., an equivalent load or a MustAlias store);
2593/// * Clobber: a write that clobbers a superset of the bits the load would read
2594/// (e.g., a memset over a larger region);
2595/// * Other: we know which block defines the memory location in some way, but
2596/// could not identify a precise instruction (e.g., memory already live at
2597/// function entry).
2598bool GVNPass::findReachingValuesForLoad(LoadInst *L,
2599 SmallVectorImpl<ReachingMemVal> &Values,
2600 MemorySSA &MSSA, AAResults &AAR) {
2601 EarliestEscapeAnalysis EA(*DT, LI);
2602 BatchAAResults AA(AAR, &EA);
2603 BasicBlock *StartBlock = L->getParent();
2604 bool IsInvariantLoad = L->hasMetadata(KindID: LLVMContext::MD_invariant_load);
2605 // TODO: Simplify later work by just getClobberingMemoryAccess().
2606 MemoryAccess *ClobberMA = MSSA.getMemoryAccess(I: L)->getDefiningAccess();
2607 const MemoryLocation Loc = MemoryLocation::get(LI: L);
2608
2609 // Fast path for load tagged with !invariant.group.
2610 if (L->hasMetadata(KindID: LLVMContext::MD_invariant_group)) {
2611 if (Instruction *G = findInvariantGroupValue(L, DT&: *DT)) {
2612 Values.emplace_back(
2613 Args: ReachingMemVal::getDef(Addr: getLoadStorePointerOperand(V: G), Inst: G));
2614 return true;
2615 }
2616 }
2617
2618 // Phase 1. First off, look for a local dependency to avoid having to
2619 // disambiguate between before the load and after the load of the starting
2620 // block (as the load may be visited from a backedge).
2621 do {
2622 // Scan users of the clobbering memory access.
2623 if (auto RMV = scanMemoryAccessesUsers(
2624 Loc, IsInvariantLoad, BB: StartBlock,
2625 ClobbersList: SmallVector<MemoryAccess *, 1>{ClobberMA}, MSSA, AA, L)) {
2626 Values.emplace_back(Args&: *RMV);
2627 return true;
2628 }
2629
2630 // Exit from here, and proceed visiting predecessors if the clobbering
2631 // access is non-local or is a MemoryPhi.
2632 if (ClobberMA->getBlock() != StartBlock || isa<MemoryPhi>(Val: ClobberMA))
2633 break;
2634
2635 // Check if the clobber actually aliases the load location.
2636 if (auto RMV = accessMayModifyLocation(ClobberMA, Loc, IsInvariantLoad,
2637 BB: StartBlock, MSSA, AA)) {
2638 Values.emplace_back(Args&: *RMV);
2639 return true;
2640 }
2641
2642 // It may happen that the clobbering memory access does not actually
2643 // clobber our load location, transition to its defining memory access.
2644 ClobberMA = cast<MemoryUseOrDef>(Val: ClobberMA)->getDefiningAccess();
2645 } while (ClobberMA->getBlock() == StartBlock);
2646
2647 // Non-local speculations are not allowed under ASan.
2648 if (L->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeAddress) ||
2649 L->getFunction()->hasFnAttribute(Kind: Attribute::SanitizeHWAddress))
2650 return false;
2651
2652 // Phase 2. Walk backwards through the CFG, collecting all the blocks that
2653 // contain an instruction that modifies the load memory location, or that lie
2654 // on a path between a clobbering block and our load. Start off by collecting
2655 // the predecessors of `StartBlock`. All the visited blocks are stored in a
2656 // the set `Blocks`. If possible, the memory address maintained for the block
2657 // visited does get phi-translated.
2658 DependencyBlockSet Blocks;
2659 SmallVector<BasicBlock *, 16> InitialWorklist;
2660 const DataLayout &DL = L->getModule()->getDataLayout();
2661 if (!collectPredecessors(BB: StartBlock,
2662 Addr: PHITransAddr(L->getPointerOperand(), DL, AC),
2663 ClobberMA, Blocks, Worklist&: InitialWorklist))
2664 return false;
2665
2666 // Do a bottom-up DFS.
2667 auto Worklist = InitialWorklist;
2668 while (!Worklist.empty()) {
2669 auto *BB = Worklist.pop_back_val();
2670 DependencyBlockInfo &Info = Blocks.find(Val: BB)->second;
2671
2672 // Phi-translation may have failed.
2673 if (!Info.Addr.getAddr())
2674 continue;
2675
2676 // If the clobbering memory access is in the current block and it indeed
2677 // clobbers our load location, record the dependency and do not visit the
2678 // predecessors of this block further, continue with the blocks in the
2679 // worklist.
2680 if (Info.ClobberMA->getBlock() == BB && !isa<MemoryPhi>(Val: Info.ClobberMA)) {
2681 if (auto RMV = accessMayModifyLocation(
2682 ClobberMA: Info.ClobberMA, Loc: Loc.getWithNewPtr(NewPtr: Info.Addr.getAddr()),
2683 IsInvariantLoad, BB, MSSA, AA)) {
2684 Info.MemVal = RMV;
2685 continue;
2686 }
2687 assert(!MSSA.isLiveOnEntryDef(Info.ClobberMA) &&
2688 "LiveOnEntry aliases everything");
2689
2690 // If, however, the clobbering memory access does not actually clobber
2691 // our load location, transition to its defining memory access, but
2692 // keep examining the same basic block.
2693 Info.ClobberMA =
2694 cast<MemoryUseOrDef>(Val: Info.ClobberMA)->getDefiningAccess();
2695 Worklist.emplace_back(Args&: BB);
2696 continue;
2697 }
2698
2699 // At this point we know the current block is "transparent", i.e. the memory
2700 // location is not modified when execution goes through this block.
2701 // Continue to its predecessors, unless a predecessor has already been
2702 // visited with a different address. We currently cannot represent such a
2703 // dependency.
2704 if (BB == StartBlock && Info.Addr.getAddr() != L->getPointerOperand()) {
2705 Info.ForceUnknown = true;
2706 continue;
2707 }
2708 if (BB != StartBlock &&
2709 !collectPredecessors(BB, Addr: Info.Addr, ClobberMA: Info.ClobberMA, Blocks, Worklist))
2710 Info.ForceUnknown = true;
2711 }
2712
2713 // Phase 3. We have collected all the blocks that either write a value to the
2714 // memory location of the load, or there exists a path to the load, along
2715 // which the memory location is not modified. Perform a second DFS to find
2716 // load-to-load dependencies; namely, look at the dominating memory reads,
2717 // that alias our load. These are the MemoryUses that are users of the
2718 // MemoryDefs we previously identified. If no memory read is encountered,
2719 // either confirm the clobbering write found before or set to unknown.
2720 Worklist = InitialWorklist;
2721 for (BasicBlock *BB : Worklist) {
2722 DependencyBlockInfo &Info = Blocks.find(Val: BB)->second;
2723 Info.Visited = true;
2724 }
2725
2726 SmallVector<MemoryAccess *> Clobbers;
2727 while (!Worklist.empty()) {
2728 auto *BB = Worklist.pop_back_val();
2729 DependencyBlockInfo &Info = Blocks.find(Val: BB)->second;
2730
2731 // If phi-translation failed, assume the memory location is modified in
2732 // unknown way.
2733 if (!Info.Addr.getAddr()) {
2734 Values.push_back(Elt: ReachingMemVal::getUnknown(BB, Addr: nullptr));
2735 continue;
2736 }
2737
2738 Clobbers.clear();
2739 collectClobberList(Clobbers, BB, StartInfo: Info, Blocks, MSSA);
2740 if (auto RMV =
2741 scanMemoryAccessesUsers(Loc: Loc.getWithNewPtr(NewPtr: Info.Addr.getAddr()),
2742 IsInvariantLoad, BB, ClobbersList: Clobbers, MSSA, AA)) {
2743 Values.push_back(Elt: *RMV);
2744 continue;
2745 }
2746
2747 // If no reusable memory use was found, and the current block is not
2748 // transparent, use the already established memory def.
2749 if (Info.MemVal) {
2750 Values.push_back(Elt: *Info.MemVal);
2751 continue;
2752 }
2753
2754 if (Info.ForceUnknown) {
2755 Values.push_back(Elt: ReachingMemVal::getUnknown(BB, Addr: Info.Addr.getAddr()));
2756 continue;
2757 }
2758
2759 // If the current block is transparent, continue to its predecessors.
2760 for (BasicBlock *Pred : predecessors(BB)) {
2761 auto It = Blocks.find(Val: Pred);
2762 if (It == Blocks.end())
2763 continue;
2764 DependencyBlockInfo &PredInfo = It->second;
2765 if (PredInfo.Visited)
2766 continue;
2767 PredInfo.Visited = true;
2768 Worklist.push_back(Elt: Pred);
2769 }
2770 }
2771
2772 return true;
2773}
2774
2775/// Attempt to eliminate a load, first by eliminating it
2776/// locally, and then attempting non-local elimination if that fails.
2777bool GVNPass::processLoad(LoadInst *L) {
2778 if (!MD && !isMemorySSAEnabled())
2779 return false;
2780
2781 // This code hasn't been audited for ordered or volatile memory access.
2782 if (!L->isUnordered())
2783 return false;
2784
2785 if (L->getType()->isTokenLikeTy())
2786 return false;
2787
2788 if (L->use_empty()) {
2789 salvageAndRemoveInstruction(I: L);
2790 return true;
2791 }
2792
2793 ReachingMemVal MemVal = ReachingMemVal::getUnknown(BB: nullptr, Addr: nullptr);
2794 if (!isMemorySSAEnabled()) {
2795 // ... to a pointer that has been loaded from before...
2796 MemDepResult Dep = MD->getDependency(QueryInst: L);
2797
2798 // If it is defined in another block, try harder.
2799 if (Dep.isNonLocal())
2800 return processNonLocalLoad(Load: L);
2801
2802 // Only handle the local case below.
2803 if (Dep.isDef())
2804 MemVal = ReachingMemVal::getDef(Addr: L->getPointerOperand(), Inst: Dep.getInst());
2805 else if (Dep.isClobber())
2806 MemVal =
2807 ReachingMemVal::getClobber(Addr: L->getPointerOperand(), Inst: Dep.getInst());
2808 } else {
2809 SmallVector<ReachingMemVal, 8> MemVals;
2810 if (!findReachingValuesForLoad(L, Values&: MemVals, MSSA&: *MSSAU->getMemorySSA(), AAR&: *AA))
2811 return false; // Too many dependencies.
2812 assert(MemVals.size() && "Expected at least an unknown value");
2813 if (MemVals.size() > 1 || MemVals[0].Block != L->getParent())
2814 return processNonLocalLoad(Load: L, Deps&: MemVals);
2815
2816 MemVal = MemVals[0];
2817 }
2818
2819 if (MemVal.Kind == DepKind::Other) {
2820 // This might be a NonFuncLocal or an Unknown.
2821 LLVM_DEBUG(
2822 // fast print dep, using operator<< on instruction is too slow.
2823 dbgs() << "GVN: load "; L->printAsOperand(dbgs());
2824 dbgs() << " has unknown dependence\n";);
2825 return false;
2826 }
2827
2828 auto AV = AnalyzeLoadAvailability(Load: L, Dep: MemVal, Address: L->getPointerOperand());
2829 if (!AV)
2830 return false;
2831
2832 Value *AvailableValue = AV->MaterializeAdjustedValue(Load: L, InsertPt: L);
2833
2834 // MaterializeAdjustedValue is responsible for combining metadata.
2835 ICF->removeUsersOf(Inst: L);
2836 L->replaceAllUsesWith(V: AvailableValue);
2837 if (MSSAU)
2838 MSSAU->removeMemoryAccess(I: L);
2839 ++NumGVNLoad;
2840 reportLoadElim(Load: L, AvailableValue, ORE);
2841 salvageAndRemoveInstruction(I: L);
2842 // Tell MDA to reexamine the reused pointer since we might have more
2843 // information after forwarding it.
2844 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy())
2845 MD->invalidateCachedPointerInfo(Ptr: AvailableValue);
2846 return true;
2847}
2848
2849// Attempt to process masked loads which have loaded from
2850// masked stores with the same mask
2851bool GVNPass::processMaskedLoad(IntrinsicInst *I) {
2852 if (!MD)
2853 return false;
2854 MemDepResult Dep = MD->getDependency(QueryInst: I);
2855 Instruction *DepInst = Dep.getInst();
2856 if (!DepInst || !Dep.isLocal() || !Dep.isDef())
2857 return false;
2858
2859 Value *Mask = I->getOperand(i_nocapture: 1);
2860 Value *Passthrough = I->getOperand(i_nocapture: 2);
2861 Value *StoreVal;
2862 if (!match(V: DepInst,
2863 P: m_MaskedStore(Op0: m_Value(V&: StoreVal), Op1: m_Value(), Op2: m_Specific(V: Mask))) ||
2864 StoreVal->getType() != I->getType())
2865 return false;
2866
2867 // Remove the load but generate a select for the passthrough
2868 Value *OpToForward = llvm::SelectInst::Create(C: Mask, S1: StoreVal, S2: Passthrough, NameStr: "",
2869 InsertBefore: I->getIterator());
2870
2871 ICF->removeUsersOf(Inst: I);
2872 I->replaceAllUsesWith(V: OpToForward);
2873 salvageAndRemoveInstruction(I);
2874 ++NumGVNLoad;
2875 return true;
2876}
2877
2878/// Return a pair the first field showing the value number of \p Exp and the
2879/// second field showing whether it is a value number newly created.
2880std::pair<uint32_t, bool>
2881GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
2882 uint32_t &E = ExpressionNumbering[Exp];
2883 bool CreateNewValNum = !E;
2884 if (CreateNewValNum) {
2885 Expressions.push_back(x: Exp);
2886 if (ExprIdx.size() < NextValueNumber + 1)
2887 ExprIdx.resize(new_size: NextValueNumber * 2);
2888 E = NextValueNumber;
2889 ExprIdx[NextValueNumber++] = NextExprNumber++;
2890 }
2891 return {E, CreateNewValNum};
2892}
2893
2894/// Return whether all the values related with the same \p num are
2895/// defined in \p BB.
2896bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
2897 GVNPass &GVN) {
2898 return all_of(
2899 Range: GVN.LeaderTable.getLeaders(N: Num),
2900 P: [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; });
2901}
2902
2903/// Wrap phiTranslateImpl to provide caching functionality.
2904uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
2905 const BasicBlock *PhiBlock,
2906 uint32_t Num, GVNPass &GVN) {
2907 auto FindRes = PhiTranslateTable.find(Val: {Num, Pred});
2908 if (FindRes != PhiTranslateTable.end())
2909 return FindRes->second;
2910 uint32_t NewNum = phiTranslateImpl(BB: Pred, PhiBlock, Num, GVN);
2911 PhiTranslateTable.insert(KV: {{Num, Pred}, NewNum});
2912 return NewNum;
2913}
2914
2915// Return true if the value number \p Num and NewNum have equal value.
2916// Return false if the result is unknown.
2917bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
2918 const BasicBlock *Pred,
2919 const BasicBlock *PhiBlock,
2920 GVNPass &GVN) {
2921 CallInst *Call = nullptr;
2922 auto Leaders = GVN.LeaderTable.getLeaders(N: Num);
2923 for (const auto &Entry : Leaders) {
2924 Call = dyn_cast<CallInst>(Val: &*Entry.Val);
2925 if (Call && Call->getParent() == PhiBlock)
2926 break;
2927 }
2928
2929 if (AA->doesNotAccessMemory(Call))
2930 return true;
2931
2932 if (!MD || !AA->onlyReadsMemory(Call))
2933 return false;
2934
2935 MemDepResult LocalDep = MD->getDependency(QueryInst: Call);
2936 if (!LocalDep.isNonLocal())
2937 return false;
2938
2939 const MemoryDependenceResults::NonLocalDepInfo &Deps =
2940 MD->getNonLocalCallDependency(QueryCall: Call);
2941
2942 // Check to see if the Call has no function local clobber.
2943 for (const NonLocalDepEntry &D : Deps) {
2944 if (D.getResult().isNonFuncLocal())
2945 return true;
2946 }
2947 return false;
2948}
2949
2950/// Translate value number \p Num using phis, so that it has the values of
2951/// the phis in BB.
2952uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
2953 const BasicBlock *PhiBlock,
2954 uint32_t Num, GVNPass &GVN) {
2955 // See if we can refine the value number by looking at the PN incoming value
2956 // for the given predecessor.
2957 if (PHINode *PN = NumberingPhi[Num]) {
2958 if (PN->getParent() != PhiBlock)
2959 return Num;
2960 for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) {
2961 if (PN->getIncomingBlock(i: I) != Pred)
2962 continue;
2963 if (uint32_t TransVal = lookup(V: PN->getIncomingValue(i: I), Verify: false))
2964 return TransVal;
2965 }
2966 return Num;
2967 }
2968
2969 if (BasicBlock *BB = NumberingBB[Num]) {
2970 assert(MSSA && "NumberingBB is non-empty only when using MemorySSA");
2971 // Value numbers of basic blocks are used to represent memory state in
2972 // load/store instructions and read-only function calls when said state is
2973 // set by a MemoryPhi.
2974 if (BB != PhiBlock)
2975 return Num;
2976 MemoryPhi *MPhi = MSSA->getMemoryAccess(BB);
2977 for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) {
2978 if (MPhi->getIncomingBlock(I: i) != Pred)
2979 continue;
2980 MemoryAccess *MA = MPhi->getIncomingValue(I: i);
2981 if (auto *PredPhi = dyn_cast<MemoryPhi>(Val: MA))
2982 return lookupOrAdd(V: PredPhi->getBlock());
2983 if (MSSA->isLiveOnEntryDef(MA))
2984 return lookupOrAdd(V: &BB->getParent()->getEntryBlock());
2985 return lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst());
2986 }
2987 llvm_unreachable(
2988 "CFG/MemorySSA mismatch: predecessor not found among incoming blocks");
2989 }
2990
2991 // If there is any value related with Num is defined in a BB other than
2992 // PhiBlock, it cannot depend on a phi in PhiBlock without going through
2993 // a backedge. We can do an early exit in that case to save compile time.
2994 if (!areAllValsInBB(Num, BB: PhiBlock, GVN))
2995 return Num;
2996
2997 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0)
2998 return Num;
2999 Expression Exp = Expressions[ExprIdx[Num]];
3000
3001 for (unsigned I = 0; I < Exp.VarArgs.size(); I++) {
3002 // For InsertValue and ExtractValue, some varargs are index numbers
3003 // instead of value numbers. Those index numbers should not be
3004 // translated.
3005 if ((I > 1 && Exp.Opcode == Instruction::InsertValue) ||
3006 (I > 0 && Exp.Opcode == Instruction::ExtractValue) ||
3007 (I > 1 && Exp.Opcode == Instruction::ShuffleVector))
3008 continue;
3009 Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Num: Exp.VarArgs[I], GVN);
3010 }
3011
3012 if (Exp.Commutative) {
3013 assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!");
3014 if (Exp.VarArgs[0] > Exp.VarArgs[1]) {
3015 std::swap(a&: Exp.VarArgs[0], b&: Exp.VarArgs[1]);
3016 uint32_t Opcode = Exp.Opcode >> 8;
3017 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)
3018 Exp.Opcode = (Opcode << 8) |
3019 CmpInst::getSwappedPredicate(
3020 pred: static_cast<CmpInst::Predicate>(Exp.Opcode & 255));
3021 }
3022 }
3023
3024 if (uint32_t NewNum = ExpressionNumbering[Exp]) {
3025 if (Exp.Opcode == Instruction::Call && NewNum != Num)
3026 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num;
3027 return NewNum;
3028 }
3029 return Num;
3030}
3031
3032/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
3033/// again.
3034void GVNPass::ValueTable::eraseTranslateCacheEntry(
3035 uint32_t Num, const BasicBlock &CurrBlock) {
3036 for (const BasicBlock *Pred : predecessors(BB: &CurrBlock))
3037 PhiTranslateTable.erase(Val: {Num, Pred});
3038}
3039
3040// In order to find a leader for a given value number at a
3041// specific basic block, we first obtain the list of all Values for that number,
3042// and then scan the list to find one whose block dominates the block in
3043// question. This is fast because dominator tree queries consist of only
3044// a few comparisons of DFS numbers.
3045Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) {
3046 auto Leaders = LeaderTable.getLeaders(N: Num);
3047 if (Leaders.empty())
3048 return nullptr;
3049
3050 Value *Val = nullptr;
3051 for (const auto &Entry : Leaders) {
3052 if (DT->dominates(A: Entry.BB, B: BB)) {
3053 Val = Entry.Val;
3054 if (isa<Constant>(Val))
3055 return Val;
3056 }
3057 }
3058
3059 return Val;
3060}
3061
3062/// There is an edge from 'Src' to 'Dst'. Return
3063/// true if every path from the entry block to 'Dst' passes via this edge. In
3064/// particular 'Dst' must not be reachable via another edge from 'Src'.
3065static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
3066 DominatorTree *DT) {
3067 // While in theory it is interesting to consider the case in which Dst has
3068 // more than one predecessor, because Dst might be part of a loop which is
3069 // only reachable from Src, in practice it is pointless since at the time
3070 // GVN runs all such loops have preheaders, which means that Dst will have
3071 // been changed to have only one predecessor, namely Src.
3072 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
3073 assert((!Pred || Pred == E.getStart()) &&
3074 "No edge between these basic blocks!");
3075 return Pred != nullptr;
3076}
3077
3078void GVNPass::assignBlockRPONumber(Function &F) {
3079 BlockRPONumber.clear();
3080 uint32_t NextBlockNumber = 1;
3081 ReversePostOrderTraversal<Function *> RPOT(&F);
3082 for (BasicBlock *BB : RPOT)
3083 BlockRPONumber[BB] = NextBlockNumber++;
3084 InvalidBlockRPONumbers = false;
3085}
3086
3087/// The given values are known to be equal in every use
3088/// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with
3089/// 'RHS' everywhere in the scope. Returns whether a change was made.
3090/// The Root may either be a basic block edge (for conditions) or an
3091/// instruction (for assumes).
3092bool GVNPass::propagateEquality(
3093 Value *LHS, Value *RHS,
3094 const std::variant<BasicBlockEdge, Instruction *> &Root) {
3095 SmallVector<std::pair<Value*, Value*>, 4> Worklist;
3096 Worklist.push_back(Elt: std::make_pair(x&: LHS, y&: RHS));
3097 bool Changed = false;
3098 SmallVector<const BasicBlock *> DominatedBlocks;
3099 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
3100 // For speed, compute a conservative fast approximation to
3101 // DT->dominates(Root, Root.getEnd());
3102 if (isOnlyReachableViaThisEdge(E: *Edge, DT))
3103 DominatedBlocks.push_back(Elt: Edge->getEnd());
3104 } else {
3105 Instruction *I = std::get<Instruction *>(v: Root);
3106 for (const auto *Node : DT->getNode(BB: I->getParent())->children())
3107 DominatedBlocks.push_back(Elt: Node->getBlock());
3108 }
3109
3110 while (!Worklist.empty()) {
3111 std::pair<Value*, Value*> Item = Worklist.pop_back_val();
3112 LHS = Item.first; RHS = Item.second;
3113
3114 if (LHS == RHS)
3115 continue;
3116 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
3117
3118 // Don't try to propagate equalities between constants.
3119 if (isa<Constant>(Val: LHS) && isa<Constant>(Val: RHS))
3120 continue;
3121
3122 // Prefer a constant on the right-hand side, or an Argument if no constants.
3123 if (isa<Constant>(Val: LHS) || (isa<Argument>(Val: LHS) && !isa<Constant>(Val: RHS)))
3124 std::swap(a&: LHS, b&: RHS);
3125 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
3126 const DataLayout &DL =
3127 isa<Argument>(Val: LHS)
3128 ? cast<Argument>(Val: LHS)->getParent()->getDataLayout()
3129 : cast<Instruction>(Val: LHS)->getDataLayout();
3130
3131 // If there is no obvious reason to prefer the left-hand side over the
3132 // right-hand side, ensure the longest lived term is on the right-hand side,
3133 // so the shortest lived term will be replaced by the longest lived.
3134 // This tends to expose more simplifications.
3135 uint32_t LVN = VN.lookupOrAdd(V: LHS);
3136 if ((isa<Argument>(Val: LHS) && isa<Argument>(Val: RHS)) ||
3137 (isa<Instruction>(Val: LHS) && isa<Instruction>(Val: RHS))) {
3138 // Move the 'oldest' value to the right-hand side, using the value number
3139 // as a proxy for age.
3140 uint32_t RVN = VN.lookupOrAdd(V: RHS);
3141 if (LVN < RVN) {
3142 std::swap(a&: LHS, b&: RHS);
3143 LVN = RVN;
3144 }
3145 }
3146
3147 // If value numbering later sees that an instruction in the scope is equal
3148 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve
3149 // the invariant that instructions only occur in the leader table for their
3150 // own value number (this is used by removeFromLeaderTable), do not do this
3151 // if RHS is an instruction (if an instruction in the scope is morphed into
3152 // LHS then it will be turned into RHS by the next GVN iteration anyway, so
3153 // using the leader table is about compiling faster, not optimizing better).
3154 // The leader table only tracks basic blocks, not edges. Only add to if we
3155 // have the simple case where the edge dominates the end.
3156 if (!isa<Instruction>(Val: RHS) && canReplacePointersIfEqual(From: LHS, To: RHS, DL))
3157 for (const BasicBlock *BB : DominatedBlocks)
3158 LeaderTable.insert(N: LVN, V: RHS, BB);
3159
3160 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As
3161 // LHS always has at least one use that is not dominated by Root, this will
3162 // never do anything if LHS has only one use.
3163 if (!LHS->hasOneUse()) {
3164 // Create a callback that captures the DL.
3165 auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) {
3166 return canReplacePointersInUseIfEqual(U, To, DL);
3167 };
3168 unsigned NumReplacements;
3169 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
3170 NumReplacements = replaceDominatedUsesWithIf(
3171 From: LHS, To: RHS, DT&: *DT, Edge: *Edge, ShouldReplace: CanReplacePointersCallBack);
3172 else
3173 NumReplacements = replaceDominatedUsesWithIf(
3174 From: LHS, To: RHS, DT&: *DT, I: std::get<Instruction *>(v: Root),
3175 ShouldReplace: CanReplacePointersCallBack);
3176
3177 if (NumReplacements > 0) {
3178 Changed = true;
3179 NumGVNEqProp += NumReplacements;
3180 // Cached information for anything that uses LHS will be invalid.
3181 if (MD)
3182 MD->invalidateCachedPointerInfo(Ptr: LHS);
3183 }
3184 }
3185
3186 // Now try to deduce additional equalities from this one. For example, if
3187 // the known equality was "(A != B)" == "false" then it follows that A and B
3188 // are equal in the scope. Only boolean equalities with an explicit true or
3189 // false RHS are currently supported.
3190 if (!RHS->getType()->isIntegerTy(BitWidth: 1))
3191 // Not a boolean equality - bail out.
3192 continue;
3193 ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS);
3194 if (!CI)
3195 // RHS neither 'true' nor 'false' - bail out.
3196 continue;
3197 // Whether RHS equals 'true'. Otherwise it equals 'false'.
3198 bool IsKnownTrue = CI->isMinusOne();
3199 bool IsKnownFalse = !IsKnownTrue;
3200
3201 // If "A && B" is known true then both A and B are known true. If "A || B"
3202 // is known false then both A and B are known false.
3203 Value *A, *B;
3204 if ((IsKnownTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) ||
3205 (IsKnownFalse && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
3206 Worklist.push_back(Elt: std::make_pair(x&: A, y&: RHS));
3207 Worklist.push_back(Elt: std::make_pair(x&: B, y&: RHS));
3208 continue;
3209 }
3210
3211 // If we are propagating an equality like "(A == B)" == "true" then also
3212 // propagate the equality A == B. When propagating a comparison such as
3213 // "(A >= B)" == "true", replace all instances of "A < B" with "false".
3214 if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: LHS)) {
3215 Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1);
3216
3217 // If "A == B" is known true, or "A != B" is known false, then replace
3218 // A with B everywhere in the scope. For floating point operations, we
3219 // have to be careful since equality does not always imply equivalance.
3220 if (Cmp->isEquivalence(Invert: IsKnownFalse))
3221 Worklist.push_back(Elt: std::make_pair(x&: Op0, y&: Op1));
3222
3223 // If "A >= B" is known true, replace "A < B" with false everywhere.
3224 CmpInst::Predicate NotPred = Cmp->getInversePredicate();
3225 Constant *NotVal = ConstantInt::get(Ty: Cmp->getType(), V: IsKnownFalse);
3226 // Since we don't have the instruction "A < B" immediately to hand, work
3227 // out the value number that it would have and use that to find an
3228 // appropriate instruction (if any).
3229 uint32_t NextNum = VN.getNextUnusedValueNumber();
3230 uint32_t Num = VN.lookupOrAddCmp(Opcode: Cmp->getOpcode(), Predicate: NotPred, LHS: Op0, RHS: Op1);
3231 // If the number we were assigned was brand new then there is no point in
3232 // looking for an instruction realizing it: there cannot be one!
3233 if (Num < NextNum) {
3234 for (const auto &Entry : LeaderTable.getLeaders(N: Num)) {
3235 // Only look at leaders that either dominate the start of the edge,
3236 // or are dominated by the end. This check is not necessary for
3237 // correctness, it only discards cases for which the following
3238 // use replacement will not work anyway.
3239 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root)) {
3240 if (!DT->dominates(A: Entry.BB, B: Edge->getStart()) &&
3241 !DT->dominates(A: Edge->getEnd(), B: Entry.BB))
3242 continue;
3243 } else {
3244 auto *InstBB = std::get<Instruction *>(v: Root)->getParent();
3245 if (!DT->dominates(A: Entry.BB, B: InstBB) &&
3246 !DT->dominates(A: InstBB, B: Entry.BB))
3247 continue;
3248 }
3249
3250 Value *NotCmp = Entry.Val;
3251 if (NotCmp && isa<Instruction>(Val: NotCmp)) {
3252 unsigned NumReplacements;
3253 if (const BasicBlockEdge *Edge = std::get_if<BasicBlockEdge>(ptr: &Root))
3254 NumReplacements =
3255 replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, Edge: *Edge);
3256 else
3257 NumReplacements = replaceDominatedUsesWith(
3258 From: NotCmp, To: NotVal, DT&: *DT, I: std::get<Instruction *>(v: Root));
3259 Changed |= NumReplacements > 0;
3260 NumGVNEqProp += NumReplacements;
3261 // Cached information for anything that uses NotCmp will be invalid.
3262 if (MD)
3263 MD->invalidateCachedPointerInfo(Ptr: NotCmp);
3264 }
3265 }
3266 }
3267 // Ensure that any instruction in scope that gets the "A < B" value number
3268 // is replaced with false.
3269 // The leader table only tracks basic blocks, not edges. Only add to if we
3270 // have the simple case where the edge dominates the end.
3271 for (const BasicBlock *BB : DominatedBlocks)
3272 LeaderTable.insert(N: Num, V: NotVal, BB);
3273
3274 continue;
3275 }
3276
3277 // Propagate equalities that results from truncation with no unsigned wrap
3278 // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) ==
3279 // "false"
3280 if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V&: A)))) {
3281 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: IsKnownTrue));
3282 continue;
3283 }
3284
3285 if (match(V: LHS, P: m_Not(V: m_Value(V&: A)))) {
3286 Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: !IsKnownTrue));
3287 continue;
3288 }
3289 }
3290
3291 return Changed;
3292}
3293
3294/// When calculating availability, handle an instruction
3295/// by inserting it into the appropriate sets.
3296bool GVNPass::processInstruction(Instruction *I) {
3297 // If the instruction can be easily simplified then do so now in preference
3298 // to value numbering it. Value numbering often exposes redundancies, for
3299 // example if it determines that %y is equal to %x then the instruction
3300 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
3301 const DataLayout &DL = I->getDataLayout();
3302 if (Value *V = simplifyInstruction(I, Q: {DL, TLI, DT, AC})) {
3303 bool Changed = false;
3304 if (!I->use_empty()) {
3305 // Simplification can cause a special instruction to become not special.
3306 // For example, devirtualization to a willreturn function.
3307 ICF->removeUsersOf(Inst: I);
3308 I->replaceAllUsesWith(V);
3309 Changed = true;
3310 }
3311 if (isInstructionTriviallyDead(I, TLI)) {
3312 salvageAndRemoveInstruction(I);
3313 Changed = true;
3314 }
3315 if (Changed) {
3316 if (MD && V->getType()->isPtrOrPtrVectorTy())
3317 MD->invalidateCachedPointerInfo(Ptr: V);
3318 ++NumGVNSimpl;
3319 return true;
3320 }
3321 }
3322
3323 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
3324 return processAssumeIntrinsic(IntrinsicI: Assume);
3325
3326 if (LoadInst *Load = dyn_cast<LoadInst>(Val: I)) {
3327 if (processLoad(L: Load))
3328 return true;
3329
3330 unsigned Num = VN.lookupOrAdd(V: Load);
3331 LeaderTable.insert(N: Num, V: Load, BB: Load->getParent());
3332 return false;
3333 }
3334
3335 if (match(V: I, P: m_Intrinsic<Intrinsic::masked_load>()) &&
3336 processMaskedLoad(I: cast<IntrinsicInst>(Val: I)))
3337 return true;
3338
3339 // For conditional branches, we can perform simple conditional propagation on
3340 // the condition value itself.
3341 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: I)) {
3342 if (isa<Constant>(Val: BI->getCondition()))
3343 return processFoldableCondBr(BI);
3344
3345 Value *BranchCond = BI->getCondition();
3346 BasicBlock *TrueSucc = BI->getSuccessor(i: 0);
3347 BasicBlock *FalseSucc = BI->getSuccessor(i: 1);
3348 // Avoid multiple edges early.
3349 if (TrueSucc == FalseSucc)
3350 return false;
3351
3352 BasicBlock *Parent = BI->getParent();
3353 bool Changed = false;
3354
3355 Value *TrueVal = ConstantInt::getTrue(Context&: TrueSucc->getContext());
3356 BasicBlockEdge TrueE(Parent, TrueSucc);
3357 Changed |= propagateEquality(LHS: BranchCond, RHS: TrueVal, Root: TrueE);
3358
3359 Value *FalseVal = ConstantInt::getFalse(Context&: FalseSucc->getContext());
3360 BasicBlockEdge FalseE(Parent, FalseSucc);
3361 Changed |= propagateEquality(LHS: BranchCond, RHS: FalseVal, Root: FalseE);
3362
3363 return Changed;
3364 }
3365
3366 // For switches, propagate the case values into the case destinations.
3367 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: I)) {
3368 Value *SwitchCond = SI->getCondition();
3369 BasicBlock *Parent = SI->getParent();
3370 bool Changed = false;
3371
3372 // Remember how many outgoing edges there are to every successor.
3373 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
3374 for (BasicBlock *Succ : successors(BB: Parent))
3375 ++SwitchEdges[Succ];
3376
3377 for (const auto &Case : SI->cases()) {
3378 BasicBlock *Dst = Case.getCaseSuccessor();
3379 // If there is only a single edge, propagate the case value into it.
3380 if (SwitchEdges.lookup(Val: Dst) == 1) {
3381 BasicBlockEdge E(Parent, Dst);
3382 Changed |= propagateEquality(LHS: SwitchCond, RHS: Case.getCaseValue(), Root: E);
3383 }
3384 }
3385 return Changed;
3386 }
3387
3388 // Instructions with void type don't return a value, so there's
3389 // no point in trying to find redundancies in them.
3390 if (I->getType()->isVoidTy())
3391 return false;
3392
3393 uint32_t NextNum = VN.getNextUnusedValueNumber();
3394 unsigned Num = VN.lookupOrAdd(V: I);
3395
3396 // Allocations are always uniquely numbered, so we can save time and memory
3397 // by fast failing them.
3398 if (isa<AllocaInst>(Val: I) || I->isTerminator() || isa<PHINode>(Val: I)) {
3399 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
3400 return false;
3401 }
3402
3403 // If the number we were assigned was a brand new VN, then we don't
3404 // need to do a lookup to see if the number already exists
3405 // somewhere in the domtree: it can't!
3406 if (Num >= NextNum) {
3407 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
3408 return false;
3409 }
3410
3411 // Perform fast-path value-number based elimination of values inherited from
3412 // dominators.
3413 Value *Repl = findLeader(BB: I->getParent(), Num);
3414 if (!Repl) {
3415 // Failure, just remember this instance for future use.
3416 LeaderTable.insert(N: Num, V: I, BB: I->getParent());
3417 return false;
3418 }
3419
3420 if (Repl == I) {
3421 // If I was the result of a shortcut PRE, it might already be in the table
3422 // and the best replacement for itself. Nothing to do.
3423 return false;
3424 }
3425
3426 // Remove it!
3427 patchAndReplaceAllUsesWith(I, Repl);
3428 if (MD && Repl->getType()->isPtrOrPtrVectorTy())
3429 MD->invalidateCachedPointerInfo(Ptr: Repl);
3430 salvageAndRemoveInstruction(I);
3431 return true;
3432}
3433
3434/// runOnFunction - This is the main transformation entry point for a function.
3435bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
3436 const TargetLibraryInfo &RunTLI, AAResults &RunAA,
3437 MemoryDependenceResults *RunMD, LoopInfo &LI,
3438 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
3439 AC = &RunAC;
3440 DT = &RunDT;
3441 VN.setDomTree(DT);
3442 TLI = &RunTLI;
3443 AA = &RunAA;
3444 VN.setAliasAnalysis(&RunAA);
3445 MD = RunMD;
3446 ImplicitControlFlowTracking ImplicitCFT;
3447 ICF = &ImplicitCFT;
3448 this->LI = &LI;
3449 VN.setMemDep(M: MD);
3450 // Propagate the MSSA-enabled flag so the value-numbering paths in
3451 // lookupOrAddCall() and computeLoadStoreVN(), which depends on whether
3452 // IsMSSAEnabled is turned on.
3453 VN.setMemorySSA(M: MSSA, MSSAEnabled: isMemorySSAEnabled());
3454 ORE = RunORE;
3455 InvalidBlockRPONumbers = true;
3456 MemorySSAUpdater Updater(MSSA);
3457 MSSAU = MSSA ? &Updater : nullptr;
3458
3459 bool Changed = false;
3460 bool ShouldContinue = true;
3461
3462 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3463 // Merge unconditional branches, allowing PRE to catch more
3464 // optimization opportunities.
3465 for (BasicBlock &BB : make_early_inc_range(Range&: F)) {
3466 bool RemovedBlock = MergeBlockIntoPredecessor(BB: &BB, DTU: &DTU, LI: &LI, MSSAU, MemDep: MD);
3467 if (RemovedBlock)
3468 ++NumGVNBlocks;
3469
3470 Changed |= RemovedBlock;
3471 }
3472 DTU.flush();
3473
3474 unsigned Iteration = 0;
3475 while (ShouldContinue) {
3476 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
3477 (void) Iteration;
3478 ShouldContinue = iterateOnFunction(F);
3479 Changed |= ShouldContinue;
3480 ++Iteration;
3481 }
3482
3483 if (isScalarPREEnabled()) {
3484 // Fabricate val-num for dead-code in order to suppress assertion in
3485 // performPRE().
3486 assignValNumForDeadCode();
3487 bool PREChanged = true;
3488 while (PREChanged) {
3489 PREChanged = performPRE(F);
3490 Changed |= PREChanged;
3491 }
3492 }
3493
3494 // FIXME: Should perform GVN again after PRE does something. PRE can move
3495 // computations into blocks where they become fully redundant. Note that
3496 // we can't do this until PRE's critical edge splitting updates memdep.
3497 // Actually, when this happens, we should just fully integrate PRE into GVN.
3498
3499 cleanupGlobalSets();
3500 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each
3501 // iteration.
3502 DeadBlocks.clear();
3503
3504 if (MSSA && VerifyMemorySSA)
3505 MSSA->verifyMemorySSA();
3506
3507 return Changed;
3508}
3509
3510bool GVNPass::processBlock(BasicBlock *BB) {
3511 if (DeadBlocks.count(key: BB))
3512 return false;
3513
3514 bool ChangedFunction = false;
3515
3516 // Since we may not have visited the input blocks of the phis, we can't
3517 // use our normal hash approach for phis. Instead, simply look for
3518 // obvious duplicates. The first pass of GVN will tend to create
3519 // identical phis, and the second or later passes can eliminate them.
3520 SmallPtrSet<PHINode *, 8> PHINodesToRemove;
3521 ChangedFunction |= EliminateDuplicatePHINodes(BB, ToRemove&: PHINodesToRemove);
3522 for (PHINode *PN : PHINodesToRemove) {
3523 removeInstruction(I: PN);
3524 }
3525 for (Instruction &Inst : make_early_inc_range(Range&: *BB))
3526 ChangedFunction |= processInstruction(I: &Inst);
3527 return ChangedFunction;
3528}
3529
3530// Instantiate an expression in a predecessor that lacked it.
3531bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
3532 BasicBlock *Curr, unsigned int ValNo) {
3533 // Because we are going top-down through the block, all value numbers
3534 // will be available in the predecessor by the time we need them. Any
3535 // that weren't originally present will have been instantiated earlier
3536 // in this loop.
3537 bool Success = true;
3538 for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) {
3539 Value *Op = Instr->getOperand(i: I);
3540 if (isa<Argument>(Val: Op) || isa<Constant>(Val: Op) || isa<GlobalValue>(Val: Op))
3541 continue;
3542 // This could be a newly inserted instruction, in which case, we won't
3543 // find a value number, and should give up before we hurt ourselves.
3544 // FIXME: Rewrite the infrastructure to let it easier to value number
3545 // and process newly inserted instructions.
3546 if (!VN.exists(V: Op)) {
3547 Success = false;
3548 break;
3549 }
3550 uint32_t TValNo =
3551 VN.phiTranslate(Pred, PhiBlock: Curr, Num: VN.lookup(V: Op), GVN&: *this);
3552 if (Value *V = findLeader(BB: Pred, Num: TValNo)) {
3553 Instr->setOperand(i: I, Val: V);
3554 } else {
3555 Success = false;
3556 break;
3557 }
3558 }
3559
3560 // Fail out if we encounter an operand that is not available in
3561 // the PRE predecessor. This is typically because of loads which
3562 // are not value numbered precisely.
3563 if (!Success)
3564 return false;
3565
3566 Instr->insertBefore(InsertPos: Pred->getTerminator()->getIterator());
3567 Instr->setName(Instr->getName() + ".pre");
3568 Instr->setDebugLoc(Instr->getDebugLoc());
3569
3570 ICF->insertInstructionTo(Inst: Instr, BB: Pred);
3571
3572 unsigned Num = VN.lookupOrAdd(V: Instr);
3573 VN.add(V: Instr, Num);
3574
3575 // Update the availability map to include the new instruction.
3576 LeaderTable.insert(N: Num, V: Instr, BB: Pred);
3577 return true;
3578}
3579
3580bool GVNPass::performScalarPRE(Instruction *CurInst) {
3581 if (isa<AllocaInst>(Val: CurInst) || CurInst->isTerminator() ||
3582 isa<PHINode>(Val: CurInst) || CurInst->getType()->isVoidTy() ||
3583 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
3584 CurInst->getType()->isTokenLikeTy())
3585 return false;
3586
3587 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
3588 // sinking the compare again, and it would force the code generator to
3589 // move the i1 from processor flags or predicate registers into a general
3590 // purpose register.
3591 if (isa<CmpInst>(Val: CurInst))
3592 return false;
3593
3594 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from
3595 // sinking the addressing mode computation back to its uses. Extending the
3596 // GEP's live range increases the register pressure, and therefore it can
3597 // introduce unnecessary spills.
3598 //
3599 // This doesn't prevent Load PRE. PHI translation will make the GEP available
3600 // to the load by moving it to the predecessor block if necessary.
3601 if (isa<GetElementPtrInst>(Val: CurInst))
3602 return false;
3603
3604 if (auto *CallB = dyn_cast<CallBase>(Val: CurInst)) {
3605 // We don't currently value number ANY inline asm calls.
3606 if (CallB->isInlineAsm())
3607 return false;
3608 }
3609
3610 uint32_t ValNo = VN.lookup(V: CurInst);
3611
3612 // Look for the predecessors for PRE opportunities. We're
3613 // only trying to solve the basic diamond case, where
3614 // a value is computed in the successor and one predecessor,
3615 // but not the other. We also explicitly disallow cases
3616 // where the successor is its own predecessor, because they're
3617 // more complicated to get right.
3618 unsigned NumWith = 0;
3619 unsigned NumWithout = 0;
3620 BasicBlock *PREPred = nullptr;
3621 BasicBlock *CurrentBlock = CurInst->getParent();
3622
3623 // Update the RPO numbers for this function.
3624 if (InvalidBlockRPONumbers)
3625 assignBlockRPONumber(F&: *CurrentBlock->getParent());
3626
3627 SmallVector<std::pair<Value *, BasicBlock *>, 8> PredMap;
3628 for (BasicBlock *P : predecessors(BB: CurrentBlock)) {
3629 // We're not interested in PRE where blocks with predecessors that are
3630 // not reachable.
3631 if (!DT->isReachableFromEntry(A: P)) {
3632 NumWithout = 2;
3633 break;
3634 }
3635 // It is not safe to do PRE when P->CurrentBlock is a loop backedge.
3636 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) &&
3637 "Invalid BlockRPONumber map.");
3638 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) {
3639 NumWithout = 2;
3640 break;
3641 }
3642
3643 uint32_t TValNo = VN.phiTranslate(Pred: P, PhiBlock: CurrentBlock, Num: ValNo, GVN&: *this);
3644 Value *PredV = findLeader(BB: P, Num: TValNo);
3645 if (!PredV) {
3646 PredMap.push_back(Elt: std::make_pair(x: static_cast<Value *>(nullptr), y&: P));
3647 PREPred = P;
3648 ++NumWithout;
3649 } else if (PredV == CurInst) {
3650 // CurInst dominates this predecessor.
3651 NumWithout = 2;
3652 break;
3653 } else {
3654 PredMap.push_back(Elt: std::make_pair(x&: PredV, y&: P));
3655 ++NumWith;
3656 }
3657 }
3658
3659 // Don't do PRE when it might increase code size, i.e. when
3660 // we would need to insert instructions in more than one pred.
3661 if (NumWithout > 1 || NumWith == 0)
3662 return false;
3663
3664 // We may have a case where all predecessors have the instruction,
3665 // and we just need to insert a phi node. Otherwise, perform
3666 // insertion.
3667 Instruction *PREInstr = nullptr;
3668
3669 if (NumWithout != 0) {
3670 if (!isSafeToSpeculativelyExecute(I: CurInst)) {
3671 // It is only valid to insert a new instruction if the current instruction
3672 // is always executed. An instruction with implicit control flow could
3673 // prevent us from doing it. If we cannot speculate the execution, then
3674 // PRE should be prohibited.
3675 if (ICF->isDominatedByICFIFromSameBlock(Insn: CurInst))
3676 return false;
3677 }
3678
3679 // Don't do PRE across indirect branch.
3680 if (isa<IndirectBrInst>(Val: PREPred->getTerminator()))
3681 return false;
3682
3683 // We can't do PRE safely on a critical edge, so instead we schedule
3684 // the edge to be split and perform the PRE the next time we iterate
3685 // on the function.
3686 unsigned SuccNum = GetSuccessorNumber(BB: PREPred, Succ: CurrentBlock);
3687 if (isCriticalEdge(TI: PREPred->getTerminator(), SuccNum)) {
3688 ToSplit.push_back(Elt: std::make_pair(x: PREPred->getTerminator(), y&: SuccNum));
3689 return false;
3690 }
3691 // We need to insert somewhere, so let's give it a shot.
3692 PREInstr = CurInst->clone();
3693 if (!performScalarPREInsertion(Instr: PREInstr, Pred: PREPred, Curr: CurrentBlock, ValNo)) {
3694 // If we failed insertion, make sure we remove the instruction.
3695#ifndef NDEBUG
3696 verifyRemoved(PREInstr);
3697#endif
3698 PREInstr->deleteValue();
3699 return false;
3700 }
3701 }
3702
3703 // Either we should have filled in the PRE instruction, or we should
3704 // not have needed insertions.
3705 assert(PREInstr != nullptr || NumWithout == 0);
3706
3707 ++NumGVNPRE;
3708
3709 // Create a PHI to make the value available in this block.
3710 PHINode *Phi = PHINode::Create(Ty: CurInst->getType(), NumReservedValues: PredMap.size(),
3711 NameStr: CurInst->getName() + ".pre-phi");
3712 Phi->insertBefore(InsertPos: CurrentBlock->begin());
3713 for (auto &[V, BB] : PredMap) {
3714 if (V) {
3715 // If we use an existing value in this phi, we have to patch the original
3716 // value because the phi will be used to replace a later value.
3717 patchReplacementInstruction(I: CurInst, Repl: V);
3718 Phi->addIncoming(V, BB);
3719 } else
3720 Phi->addIncoming(V: PREInstr, BB: PREPred);
3721 }
3722
3723 VN.add(V: Phi, Num: ValNo);
3724 // After creating a new PHI for ValNo, the phi translate result for ValNo will
3725 // be changed, so erase the related stale entries in phi translate cache.
3726 VN.eraseTranslateCacheEntry(Num: ValNo, CurrBlock: *CurrentBlock);
3727 LeaderTable.insert(N: ValNo, V: Phi, BB: CurrentBlock);
3728 Phi->setDebugLoc(CurInst->getDebugLoc());
3729 CurInst->replaceAllUsesWith(V: Phi);
3730 if (MD && Phi->getType()->isPtrOrPtrVectorTy())
3731 MD->invalidateCachedPointerInfo(Ptr: Phi);
3732 LeaderTable.erase(N: ValNo, I: CurInst, BB: CurrentBlock);
3733
3734 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
3735 removeInstruction(I: CurInst);
3736
3737 return true;
3738}
3739
3740/// Perform a purely local form of PRE that looks for diamond
3741/// control flow patterns and attempts to perform simple PRE at the join point.
3742bool GVNPass::performPRE(Function &F) {
3743 bool Changed = false;
3744 for (BasicBlock *CurrentBlock : depth_first(G: &F.getEntryBlock())) {
3745 // Nothing to PRE in the entry block.
3746 if (CurrentBlock == &F.getEntryBlock())
3747 continue;
3748
3749 // Don't perform PRE on an EH pad.
3750 if (CurrentBlock->isEHPad())
3751 continue;
3752
3753 for (BasicBlock::iterator BI = CurrentBlock->begin(),
3754 BE = CurrentBlock->end();
3755 BI != BE;) {
3756 Instruction *CurInst = &*BI++;
3757 Changed |= performScalarPRE(CurInst);
3758 }
3759 }
3760
3761 if (splitCriticalEdges())
3762 Changed = true;
3763
3764 return Changed;
3765}
3766
3767/// Split the critical edge connecting the given two blocks, and return
3768/// the block inserted to the critical edge.
3769BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
3770 // GVN does not require loop-simplify, do not try to preserve it if it is not
3771 // possible.
3772 BasicBlock *BB = SplitCriticalEdge(
3773 Src: Pred, Dst: Succ,
3774 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify());
3775 if (BB) {
3776 if (MD)
3777 MD->invalidateCachedPredecessors();
3778 InvalidBlockRPONumbers = true;
3779 }
3780 return BB;
3781}
3782
3783/// Split critical edges found during the previous
3784/// iteration that may enable further optimization.
3785bool GVNPass::splitCriticalEdges() {
3786 if (ToSplit.empty())
3787 return false;
3788
3789 bool Changed = false;
3790 do {
3791 std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val();
3792 Changed |= SplitCriticalEdge(TI: Edge.first, SuccNum: Edge.second,
3793 Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU)) !=
3794 nullptr;
3795 } while (!ToSplit.empty());
3796 if (Changed) {
3797 if (MD)
3798 MD->invalidateCachedPredecessors();
3799 InvalidBlockRPONumbers = true;
3800 }
3801 return Changed;
3802}
3803
3804/// Executes one iteration of GVN.
3805bool GVNPass::iterateOnFunction(Function &F) {
3806 cleanupGlobalSets();
3807
3808 // Top-down walk of the dominator tree.
3809 bool Changed = false;
3810 // Needed for value numbering with phi construction to work.
3811 // RPOT walks the graph in its constructor and will not be invalidated during
3812 // processBlock.
3813 ReversePostOrderTraversal<Function *> RPOT(&F);
3814
3815 for (BasicBlock *BB : RPOT)
3816 Changed |= processBlock(BB);
3817
3818 return Changed;
3819}
3820
3821void GVNPass::cleanupGlobalSets() {
3822 VN.clear();
3823 LeaderTable.clear();
3824 BlockRPONumber.clear();
3825 ICF->clear();
3826 InvalidBlockRPONumbers = true;
3827}
3828
3829void GVNPass::removeInstruction(Instruction *I) {
3830 VN.erase(V: I);
3831 if (MD) MD->removeInstruction(InstToRemove: I);
3832 if (MSSAU)
3833 MSSAU->removeMemoryAccess(I);
3834#ifndef NDEBUG
3835 verifyRemoved(I);
3836#endif
3837 ICF->removeInstruction(Inst: I);
3838 I->eraseFromParent();
3839 ++NumGVNInstr;
3840}
3841
3842/// Verify that the specified instruction does not occur in our
3843/// internal data structures.
3844void GVNPass::verifyRemoved(const Instruction *Inst) const {
3845 VN.verifyRemoved(V: Inst);
3846}
3847
3848/// BB is declared dead, which implied other blocks become dead as well. This
3849/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
3850/// live successors, update their phi nodes by replacing the operands
3851/// corresponding to dead blocks with UndefVal.
3852void GVNPass::addDeadBlock(BasicBlock *BB) {
3853 SmallVector<BasicBlock *, 4> NewDead;
3854 SmallSetVector<BasicBlock *, 4> DF;
3855
3856 NewDead.push_back(Elt: BB);
3857 while (!NewDead.empty()) {
3858 BasicBlock *D = NewDead.pop_back_val();
3859 if (DeadBlocks.count(key: D))
3860 continue;
3861
3862 // All blocks dominated by D are dead.
3863 SmallVector<BasicBlock *, 8> Dom;
3864 DT->getDescendants(R: D, Result&: Dom);
3865 DeadBlocks.insert_range(R&: Dom);
3866
3867 // Figure out the dominance-frontier(D).
3868 for (BasicBlock *B : Dom) {
3869 for (BasicBlock *S : successors(BB: B)) {
3870 if (DeadBlocks.count(key: S))
3871 continue;
3872
3873 bool AllPredDead = true;
3874 for (BasicBlock *P : predecessors(BB: S))
3875 if (!DeadBlocks.count(key: P)) {
3876 AllPredDead = false;
3877 break;
3878 }
3879
3880 if (!AllPredDead) {
3881 // S could be proved dead later on. That is why we don't update phi
3882 // operands at this moment.
3883 DF.insert(X: S);
3884 } else {
3885 // While S is not dominated by D, it is dead by now. This could take
3886 // place if S already have a dead predecessor before D is declared
3887 // dead.
3888 NewDead.push_back(Elt: S);
3889 }
3890 }
3891 }
3892 }
3893
3894 // For the dead blocks' live successors, update their phi nodes by replacing
3895 // the operands corresponding to dead blocks with UndefVal.
3896 for (BasicBlock *B : DF) {
3897 if (DeadBlocks.count(key: B))
3898 continue;
3899
3900 // First, split the critical edges. This might also create additional blocks
3901 // to preserve LoopSimplify form and adjust edges accordingly.
3902 SmallVector<BasicBlock *, 4> Preds(predecessors(BB: B));
3903 for (BasicBlock *P : Preds) {
3904 if (!DeadBlocks.count(key: P))
3905 continue;
3906
3907 if (is_contained(Range: successors(BB: P), Element: B) &&
3908 isCriticalEdge(TI: P->getTerminator(), Succ: B)) {
3909 if (BasicBlock *S = splitCriticalEdges(Pred: P, Succ: B))
3910 DeadBlocks.insert(X: P = S);
3911 }
3912 }
3913
3914 // Now poison the incoming values from the dead predecessors.
3915 for (BasicBlock *P : predecessors(BB: B)) {
3916 if (!DeadBlocks.count(key: P))
3917 continue;
3918 for (PHINode &Phi : B->phis()) {
3919 Phi.setIncomingValueForBlock(BB: P, V: PoisonValue::get(T: Phi.getType()));
3920 if (MD)
3921 MD->invalidateCachedPointerInfo(Ptr: &Phi);
3922 }
3923 }
3924 }
3925}
3926
3927// If the given branch is recognized as a foldable branch (i.e. conditional
3928// branch with constant condition), it will perform following analyses and
3929// transformation.
3930// 1) If the dead out-coming edge is a critical-edge, split it. Let
3931// R be the target of the dead out-coming edge.
3932// 1) Identify the set of dead blocks implied by the branch's dead outcoming
3933// edge. The result of this step will be {X| X is dominated by R}
3934// 2) Identify those blocks which haves at least one dead predecessor. The
3935// result of this step will be dominance-frontier(R).
3936// 3) Update the PHIs in DF(R) by replacing the operands corresponding to
3937// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
3938//
3939// Return true iff *NEW* dead code are found.
3940bool GVNPass::processFoldableCondBr(CondBrInst *BI) {
3941 // If a branch has two identical successors, we cannot declare either dead.
3942 if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1))
3943 return false;
3944
3945 ConstantInt *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition());
3946 if (!Cond)
3947 return false;
3948
3949 BasicBlock *DeadRoot =
3950 Cond->getZExtValue() ? BI->getSuccessor(i: 1) : BI->getSuccessor(i: 0);
3951 if (DeadBlocks.count(key: DeadRoot))
3952 return false;
3953
3954 if (!DeadRoot->getSinglePredecessor())
3955 DeadRoot = splitCriticalEdges(Pred: BI->getParent(), Succ: DeadRoot);
3956
3957 addDeadBlock(BB: DeadRoot);
3958 return true;
3959}
3960
3961// performPRE() will trigger assert if it comes across an instruction without
3962// associated val-num. As it normally has far more live instructions than dead
3963// instructions, it makes more sense just to "fabricate" a val-number for the
3964// dead code than checking if instruction involved is dead or not.
3965void GVNPass::assignValNumForDeadCode() {
3966 for (BasicBlock *BB : DeadBlocks) {
3967 for (Instruction &Inst : *BB) {
3968 unsigned ValNum = VN.lookupOrAdd(V: &Inst);
3969 LeaderTable.insert(N: ValNum, V: &Inst, BB);
3970 }
3971 }
3972}
3973
3974class llvm::gvn::GVNLegacyPass : public FunctionPass {
3975public:
3976 static char ID; // Pass identification, replacement for typeid.
3977
3978 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep,
3979 bool MemSSAAnalysis = GVNEnableMemorySSA,
3980 bool ScalarPRE = true)
3981 : FunctionPass(ID), Impl(GVNOptions()
3982 .setMemDep(MemDepAnalysis)
3983 .setMemorySSA(MemSSAAnalysis)
3984 .setScalarPRE(ScalarPRE)) {
3985 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry());
3986 }
3987
3988 bool runOnFunction(Function &F) override {
3989 if (skipFunction(F))
3990 return false;
3991
3992 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
3993 if (Impl.isMemorySSAEnabled() && !MSSAWP)
3994 MSSAWP = &getAnalysis<MemorySSAWrapperPass>();
3995
3996 return Impl.runImpl(
3997 F, RunAC&: getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
3998 RunDT&: getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
3999 RunTLI: getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
4000 RunAA&: getAnalysis<AAResultsWrapperPass>().getAAResults(),
4001 RunMD: Impl.isMemDepEnabled()
4002 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep()
4003 : nullptr,
4004 LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
4005 RunORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(),
4006 MSSA: MSSAWP ? &MSSAWP->getMSSA() : nullptr);
4007 }
4008
4009 void getAnalysisUsage(AnalysisUsage &AU) const override {
4010 AU.addRequired<AssumptionCacheTracker>();
4011 AU.addRequired<DominatorTreeWrapperPass>();
4012 AU.addRequired<TargetLibraryInfoWrapperPass>();
4013 AU.addRequired<LoopInfoWrapperPass>();
4014 if (Impl.isMemDepEnabled())
4015 AU.addRequired<MemoryDependenceWrapperPass>();
4016 AU.addRequired<AAResultsWrapperPass>();
4017 AU.addPreserved<DominatorTreeWrapperPass>();
4018 AU.addPreserved<GlobalsAAWrapperPass>();
4019 AU.addPreserved<TargetLibraryInfoWrapperPass>();
4020 AU.addPreserved<LoopInfoWrapperPass>();
4021 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
4022 AU.addPreserved<MemorySSAWrapperPass>();
4023 if (Impl.isMemorySSAEnabled())
4024 AU.addRequired<MemorySSAWrapperPass>();
4025 }
4026
4027private:
4028 GVNPass Impl;
4029};
4030
4031char GVNLegacyPass::ID = 0;
4032
4033INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4034INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
4035INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
4036INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
4037INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
4038INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
4039INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
4040INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
4041INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
4042INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
4043
4044// The public interface to this file...
4045FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); }
4046FunctionPass *llvm::createGVNPass(bool ScalarPRE) {
4047 return new GVNLegacyPass(GVNEnableMemDep, GVNEnableMemorySSA, ScalarPRE);
4048}
4049