1 | //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass performs global value numbering to eliminate fully redundant |
10 | // instructions. It also performs simple dead load elimination. |
11 | // |
12 | // Note that this pass does the value numbering itself; it does not use the |
13 | // ValueNumbering analysis passes. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Transforms/Scalar/GVN.h" |
18 | #include "llvm/ADT/DenseMap.h" |
19 | #include "llvm/ADT/DepthFirstIterator.h" |
20 | #include "llvm/ADT/Hashing.h" |
21 | #include "llvm/ADT/MapVector.h" |
22 | #include "llvm/ADT/PostOrderIterator.h" |
23 | #include "llvm/ADT/STLExtras.h" |
24 | #include "llvm/ADT/SetVector.h" |
25 | #include "llvm/ADT/SmallPtrSet.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/ADT/Statistic.h" |
28 | #include "llvm/Analysis/AliasAnalysis.h" |
29 | #include "llvm/Analysis/AssumeBundleQueries.h" |
30 | #include "llvm/Analysis/AssumptionCache.h" |
31 | #include "llvm/Analysis/CFG.h" |
32 | #include "llvm/Analysis/DomTreeUpdater.h" |
33 | #include "llvm/Analysis/GlobalsModRef.h" |
34 | #include "llvm/Analysis/InstructionPrecedenceTracking.h" |
35 | #include "llvm/Analysis/InstructionSimplify.h" |
36 | #include "llvm/Analysis/Loads.h" |
37 | #include "llvm/Analysis/LoopInfo.h" |
38 | #include "llvm/Analysis/MemoryBuiltins.h" |
39 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" |
40 | #include "llvm/Analysis/MemorySSA.h" |
41 | #include "llvm/Analysis/MemorySSAUpdater.h" |
42 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
43 | #include "llvm/Analysis/PHITransAddr.h" |
44 | #include "llvm/Analysis/TargetLibraryInfo.h" |
45 | #include "llvm/Analysis/ValueTracking.h" |
46 | #include "llvm/IR/Attributes.h" |
47 | #include "llvm/IR/BasicBlock.h" |
48 | #include "llvm/IR/Constant.h" |
49 | #include "llvm/IR/Constants.h" |
50 | #include "llvm/IR/DebugLoc.h" |
51 | #include "llvm/IR/Dominators.h" |
52 | #include "llvm/IR/Function.h" |
53 | #include "llvm/IR/InstrTypes.h" |
54 | #include "llvm/IR/Instruction.h" |
55 | #include "llvm/IR/Instructions.h" |
56 | #include "llvm/IR/IntrinsicInst.h" |
57 | #include "llvm/IR/LLVMContext.h" |
58 | #include "llvm/IR/Metadata.h" |
59 | #include "llvm/IR/Module.h" |
60 | #include "llvm/IR/PassManager.h" |
61 | #include "llvm/IR/PatternMatch.h" |
62 | #include "llvm/IR/Type.h" |
63 | #include "llvm/IR/Use.h" |
64 | #include "llvm/IR/Value.h" |
65 | #include "llvm/InitializePasses.h" |
66 | #include "llvm/Pass.h" |
67 | #include "llvm/Support/Casting.h" |
68 | #include "llvm/Support/CommandLine.h" |
69 | #include "llvm/Support/Compiler.h" |
70 | #include "llvm/Support/Debug.h" |
71 | #include "llvm/Support/raw_ostream.h" |
72 | #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" |
73 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
74 | #include "llvm/Transforms/Utils/Local.h" |
75 | #include "llvm/Transforms/Utils/SSAUpdater.h" |
76 | #include "llvm/Transforms/Utils/VNCoercion.h" |
77 | #include <algorithm> |
78 | #include <cassert> |
79 | #include <cstdint> |
80 | #include <optional> |
81 | #include <utility> |
82 | |
83 | using namespace llvm; |
84 | using namespace llvm::gvn; |
85 | using namespace llvm::VNCoercion; |
86 | using namespace PatternMatch; |
87 | |
88 | #define DEBUG_TYPE "gvn" |
89 | |
90 | STATISTIC(NumGVNInstr, "Number of instructions deleted" ); |
91 | STATISTIC(NumGVNLoad, "Number of loads deleted" ); |
92 | STATISTIC(NumGVNPRE, "Number of instructions PRE'd" ); |
93 | STATISTIC(NumGVNBlocks, "Number of blocks merged" ); |
94 | STATISTIC(NumGVNSimpl, "Number of instructions simplified" ); |
95 | STATISTIC(NumGVNEqProp, "Number of equalities propagated" ); |
96 | STATISTIC(NumPRELoad, "Number of loads PRE'd" ); |
97 | STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd" ); |
98 | STATISTIC(NumPRELoadMoved2CEPred, |
99 | "Number of loads moved to predecessor of a critical edge in PRE" ); |
100 | |
101 | STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, |
102 | "Number of blocks speculated as available in " |
103 | "IsValueFullyAvailableInBlock(), max" ); |
104 | STATISTIC(MaxBBSpeculationCutoffReachedTimes, |
105 | "Number of times we we reached gvn-max-block-speculations cut-off " |
106 | "preventing further exploration" ); |
107 | |
108 | static cl::opt<bool> GVNEnablePRE("enable-pre" , cl::init(Val: true), cl::Hidden); |
109 | static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre" , cl::init(Val: true)); |
110 | static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre" , |
111 | cl::init(Val: true)); |
112 | static cl::opt<bool> |
113 | GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre" , |
114 | cl::init(Val: false)); |
115 | static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep" , cl::init(Val: true)); |
116 | static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa" , |
117 | cl::init(Val: false)); |
118 | |
119 | static cl::opt<uint32_t> MaxNumDeps( |
120 | "gvn-max-num-deps" , cl::Hidden, cl::init(Val: 100), |
121 | cl::desc("Max number of dependences to attempt Load PRE (default = 100)" )); |
122 | |
123 | // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. |
124 | static cl::opt<uint32_t> MaxBBSpeculations( |
125 | "gvn-max-block-speculations" , cl::Hidden, cl::init(Val: 600), |
126 | cl::desc("Max number of blocks we're willing to speculate on (and recurse " |
127 | "into) when deducing if a value is fully available or not in GVN " |
128 | "(default = 600)" )); |
129 | |
130 | static cl::opt<uint32_t> MaxNumVisitedInsts( |
131 | "gvn-max-num-visited-insts" , cl::Hidden, cl::init(Val: 100), |
132 | cl::desc("Max number of visited instructions when trying to find " |
133 | "dominating value of select dependency (default = 100)" )); |
134 | |
135 | static cl::opt<uint32_t> MaxNumInsnsPerBlock( |
136 | "gvn-max-num-insns" , cl::Hidden, cl::init(Val: 100), |
137 | cl::desc("Max number of instructions to scan in each basic block in GVN " |
138 | "(default = 100)" )); |
139 | |
140 | struct llvm::GVNPass::Expression { |
141 | uint32_t Opcode; |
142 | bool Commutative = false; |
143 | // The type is not necessarily the result type of the expression, it may be |
144 | // any additional type needed to disambiguate the expression. |
145 | Type *Ty = nullptr; |
146 | SmallVector<uint32_t, 4> VarArgs; |
147 | |
148 | AttributeList Attrs; |
149 | |
150 | Expression(uint32_t Op = ~2U) : Opcode(Op) {} |
151 | |
152 | bool operator==(const Expression &Other) const { |
153 | if (Opcode != Other.Opcode) |
154 | return false; |
155 | if (Opcode == ~0U || Opcode == ~1U) |
156 | return true; |
157 | if (Ty != Other.Ty) |
158 | return false; |
159 | if (VarArgs != Other.VarArgs) |
160 | return false; |
161 | if ((!Attrs.isEmpty() || !Other.Attrs.isEmpty()) && |
162 | !Attrs.intersectWith(C&: Ty->getContext(), Other: Other.Attrs).has_value()) |
163 | return false; |
164 | return true; |
165 | } |
166 | |
167 | friend hash_code hash_value(const Expression &Value) { |
168 | return hash_combine(args: Value.Opcode, args: Value.Ty, |
169 | args: hash_combine_range(R: Value.VarArgs)); |
170 | } |
171 | }; |
172 | |
173 | namespace llvm { |
174 | |
175 | template <> struct DenseMapInfo<GVNPass::Expression> { |
176 | static inline GVNPass::Expression getEmptyKey() { return ~0U; } |
177 | static inline GVNPass::Expression getTombstoneKey() { return ~1U; } |
178 | |
179 | static unsigned getHashValue(const GVNPass::Expression &E) { |
180 | using llvm::hash_value; |
181 | |
182 | return static_cast<unsigned>(hash_value(Value: E)); |
183 | } |
184 | |
185 | static bool isEqual(const GVNPass::Expression &LHS, |
186 | const GVNPass::Expression &RHS) { |
187 | return LHS == RHS; |
188 | } |
189 | }; |
190 | |
191 | } // end namespace llvm |
192 | |
193 | /// Represents a particular available value that we know how to materialize. |
194 | /// Materialization of an AvailableValue never fails. An AvailableValue is |
195 | /// implicitly associated with a rematerialization point which is the |
196 | /// location of the instruction from which it was formed. |
197 | struct llvm::gvn::AvailableValue { |
198 | enum class ValType { |
199 | SimpleVal, // A simple offsetted value that is accessed. |
200 | LoadVal, // A value produced by a load. |
201 | MemIntrin, // A memory intrinsic which is loaded from. |
202 | UndefVal, // A UndefValue representing a value from dead block (which |
203 | // is not yet physically removed from the CFG). |
204 | SelectVal, // A pointer select which is loaded from and for which the load |
205 | // can be replace by a value select. |
206 | }; |
207 | |
208 | /// Val - The value that is live out of the block. |
209 | Value *Val; |
210 | /// Kind of the live-out value. |
211 | ValType Kind; |
212 | |
213 | /// Offset - The byte offset in Val that is interesting for the load query. |
214 | unsigned Offset = 0; |
215 | /// V1, V2 - The dominating non-clobbered values of SelectVal. |
216 | Value *V1 = nullptr, *V2 = nullptr; |
217 | |
218 | static AvailableValue get(Value *V, unsigned Offset = 0) { |
219 | AvailableValue Res; |
220 | Res.Val = V; |
221 | Res.Kind = ValType::SimpleVal; |
222 | Res.Offset = Offset; |
223 | return Res; |
224 | } |
225 | |
226 | static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { |
227 | AvailableValue Res; |
228 | Res.Val = MI; |
229 | Res.Kind = ValType::MemIntrin; |
230 | Res.Offset = Offset; |
231 | return Res; |
232 | } |
233 | |
234 | static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) { |
235 | AvailableValue Res; |
236 | Res.Val = Load; |
237 | Res.Kind = ValType::LoadVal; |
238 | Res.Offset = Offset; |
239 | return Res; |
240 | } |
241 | |
242 | static AvailableValue getUndef() { |
243 | AvailableValue Res; |
244 | Res.Val = nullptr; |
245 | Res.Kind = ValType::UndefVal; |
246 | Res.Offset = 0; |
247 | return Res; |
248 | } |
249 | |
250 | static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) { |
251 | AvailableValue Res; |
252 | Res.Val = Sel; |
253 | Res.Kind = ValType::SelectVal; |
254 | Res.Offset = 0; |
255 | Res.V1 = V1; |
256 | Res.V2 = V2; |
257 | return Res; |
258 | } |
259 | |
260 | bool isSimpleValue() const { return Kind == ValType::SimpleVal; } |
261 | bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; } |
262 | bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; } |
263 | bool isUndefValue() const { return Kind == ValType::UndefVal; } |
264 | bool isSelectValue() const { return Kind == ValType::SelectVal; } |
265 | |
266 | Value *getSimpleValue() const { |
267 | assert(isSimpleValue() && "Wrong accessor" ); |
268 | return Val; |
269 | } |
270 | |
271 | LoadInst *getCoercedLoadValue() const { |
272 | assert(isCoercedLoadValue() && "Wrong accessor" ); |
273 | return cast<LoadInst>(Val); |
274 | } |
275 | |
276 | MemIntrinsic *getMemIntrinValue() const { |
277 | assert(isMemIntrinValue() && "Wrong accessor" ); |
278 | return cast<MemIntrinsic>(Val); |
279 | } |
280 | |
281 | SelectInst *getSelectValue() const { |
282 | assert(isSelectValue() && "Wrong accessor" ); |
283 | return cast<SelectInst>(Val); |
284 | } |
285 | |
286 | /// Emit code at the specified insertion point to adjust the value defined |
287 | /// here to the specified type. This handles various coercion cases. |
288 | Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt) const; |
289 | }; |
290 | |
291 | /// Represents an AvailableValue which can be rematerialized at the end of |
292 | /// the associated BasicBlock. |
293 | struct llvm::gvn::AvailableValueInBlock { |
294 | /// BB - The basic block in question. |
295 | BasicBlock *BB = nullptr; |
296 | |
297 | /// AV - The actual available value. |
298 | AvailableValue AV; |
299 | |
300 | static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) { |
301 | AvailableValueInBlock Res; |
302 | Res.BB = BB; |
303 | Res.AV = std::move(AV); |
304 | return Res; |
305 | } |
306 | |
307 | static AvailableValueInBlock get(BasicBlock *BB, Value *V, |
308 | unsigned Offset = 0) { |
309 | return get(BB, AV: AvailableValue::get(V, Offset)); |
310 | } |
311 | |
312 | static AvailableValueInBlock getUndef(BasicBlock *BB) { |
313 | return get(BB, AV: AvailableValue::getUndef()); |
314 | } |
315 | |
316 | static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel, |
317 | Value *V1, Value *V2) { |
318 | return get(BB, AV: AvailableValue::getSelect(Sel, V1, V2)); |
319 | } |
320 | |
321 | /// Emit code at the end of this block to adjust the value defined here to |
322 | /// the specified type. This handles various coercion cases. |
323 | Value *MaterializeAdjustedValue(LoadInst *Load) const { |
324 | return AV.MaterializeAdjustedValue(Load, InsertPt: BB->getTerminator()); |
325 | } |
326 | }; |
327 | |
328 | //===----------------------------------------------------------------------===// |
329 | // ValueTable Internal Functions |
330 | //===----------------------------------------------------------------------===// |
331 | |
332 | GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) { |
333 | Expression E; |
334 | E.Ty = I->getType(); |
335 | E.Opcode = I->getOpcode(); |
336 | if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Val: I)) { |
337 | // gc.relocate is 'special' call: its second and third operands are |
338 | // not real values, but indices into statepoint's argument list. |
339 | // Use the refered to values for purposes of identity. |
340 | E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getOperand(i_nocapture: 0))); |
341 | E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getBasePtr())); |
342 | E.VarArgs.push_back(Elt: lookupOrAdd(V: GCR->getDerivedPtr())); |
343 | } else { |
344 | for (Use &Op : I->operands()) |
345 | E.VarArgs.push_back(Elt: lookupOrAdd(V: Op)); |
346 | } |
347 | if (I->isCommutative()) { |
348 | // Ensure that commutative instructions that only differ by a permutation |
349 | // of their operands get the same value number by sorting the operand value |
350 | // numbers. Since commutative operands are the 1st two operands it is more |
351 | // efficient to sort by hand rather than using, say, std::sort. |
352 | assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!" ); |
353 | if (E.VarArgs[0] > E.VarArgs[1]) |
354 | std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]); |
355 | E.Commutative = true; |
356 | } |
357 | |
358 | if (auto *C = dyn_cast<CmpInst>(Val: I)) { |
359 | // Sort the operand value numbers so x<y and y>x get the same value number. |
360 | CmpInst::Predicate Predicate = C->getPredicate(); |
361 | if (E.VarArgs[0] > E.VarArgs[1]) { |
362 | std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]); |
363 | Predicate = CmpInst::getSwappedPredicate(pred: Predicate); |
364 | } |
365 | E.Opcode = (C->getOpcode() << 8) | Predicate; |
366 | E.Commutative = true; |
367 | } else if (auto *IVI = dyn_cast<InsertValueInst>(Val: I)) { |
368 | E.VarArgs.append(in_start: IVI->idx_begin(), in_end: IVI->idx_end()); |
369 | } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: I)) { |
370 | ArrayRef<int> ShuffleMask = SVI->getShuffleMask(); |
371 | E.VarArgs.append(in_start: ShuffleMask.begin(), in_end: ShuffleMask.end()); |
372 | } else if (auto *CB = dyn_cast<CallBase>(Val: I)) { |
373 | E.Attrs = CB->getAttributes(); |
374 | } |
375 | |
376 | return E; |
377 | } |
378 | |
379 | GVNPass::Expression GVNPass::ValueTable::createCmpExpr( |
380 | unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { |
381 | assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && |
382 | "Not a comparison!" ); |
383 | Expression E; |
384 | E.Ty = CmpInst::makeCmpResultType(opnd_type: LHS->getType()); |
385 | E.VarArgs.push_back(Elt: lookupOrAdd(V: LHS)); |
386 | E.VarArgs.push_back(Elt: lookupOrAdd(V: RHS)); |
387 | |
388 | // Sort the operand value numbers so x<y and y>x get the same value number. |
389 | if (E.VarArgs[0] > E.VarArgs[1]) { |
390 | std::swap(a&: E.VarArgs[0], b&: E.VarArgs[1]); |
391 | Predicate = CmpInst::getSwappedPredicate(pred: Predicate); |
392 | } |
393 | E.Opcode = (Opcode << 8) | Predicate; |
394 | E.Commutative = true; |
395 | return E; |
396 | } |
397 | |
398 | GVNPass::Expression |
399 | GVNPass::ValueTable::(ExtractValueInst *EI) { |
400 | assert(EI && "Not an ExtractValueInst?" ); |
401 | Expression E; |
402 | E.Ty = EI->getType(); |
403 | E.Opcode = 0; |
404 | |
405 | WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Val: EI->getAggregateOperand()); |
406 | if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { |
407 | // EI is an extract from one of our with.overflow intrinsics. Synthesize |
408 | // a semantically equivalent expression instead of an extract value |
409 | // expression. |
410 | E.Opcode = WO->getBinaryOp(); |
411 | E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getLHS())); |
412 | E.VarArgs.push_back(Elt: lookupOrAdd(V: WO->getRHS())); |
413 | return E; |
414 | } |
415 | |
416 | // Not a recognised intrinsic. Fall back to producing an extract value |
417 | // expression. |
418 | E.Opcode = EI->getOpcode(); |
419 | for (Use &Op : EI->operands()) |
420 | E.VarArgs.push_back(Elt: lookupOrAdd(V: Op)); |
421 | |
422 | append_range(C&: E.VarArgs, R: EI->indices()); |
423 | |
424 | return E; |
425 | } |
426 | |
427 | GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) { |
428 | Expression E; |
429 | Type *PtrTy = GEP->getType()->getScalarType(); |
430 | const DataLayout &DL = GEP->getDataLayout(); |
431 | unsigned BitWidth = DL.getIndexTypeSizeInBits(Ty: PtrTy); |
432 | SmallMapVector<Value *, APInt, 4> VariableOffsets; |
433 | APInt ConstantOffset(BitWidth, 0); |
434 | if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { |
435 | // Convert into offset representation, to recognize equivalent address |
436 | // calculations that use different type encoding. |
437 | LLVMContext &Context = GEP->getContext(); |
438 | E.Opcode = GEP->getOpcode(); |
439 | E.Ty = nullptr; |
440 | E.VarArgs.push_back(Elt: lookupOrAdd(V: GEP->getPointerOperand())); |
441 | for (const auto &[V, Scale] : VariableOffsets) { |
442 | E.VarArgs.push_back(Elt: lookupOrAdd(V)); |
443 | E.VarArgs.push_back(Elt: lookupOrAdd(V: ConstantInt::get(Context, V: Scale))); |
444 | } |
445 | if (!ConstantOffset.isZero()) |
446 | E.VarArgs.push_back( |
447 | Elt: lookupOrAdd(V: ConstantInt::get(Context, V: ConstantOffset))); |
448 | } else { |
449 | // If converting to offset representation fails (for scalable vectors), |
450 | // fall back to type-based implementation. |
451 | E.Opcode = GEP->getOpcode(); |
452 | E.Ty = GEP->getSourceElementType(); |
453 | for (Use &Op : GEP->operands()) |
454 | E.VarArgs.push_back(Elt: lookupOrAdd(V: Op)); |
455 | } |
456 | return E; |
457 | } |
458 | |
459 | //===----------------------------------------------------------------------===// |
460 | // ValueTable External Functions |
461 | //===----------------------------------------------------------------------===// |
462 | |
463 | GVNPass::ValueTable::ValueTable() = default; |
464 | GVNPass::ValueTable::ValueTable(const ValueTable &) = default; |
465 | GVNPass::ValueTable::ValueTable(ValueTable &&) = default; |
466 | GVNPass::ValueTable::~ValueTable() = default; |
467 | GVNPass::ValueTable & |
468 | GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default; |
469 | |
470 | /// add - Insert a value into the table with a specified value number. |
471 | void GVNPass::ValueTable::add(Value *V, uint32_t Num) { |
472 | ValueNumbering.insert(KV: std::make_pair(x&: V, y&: Num)); |
473 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) |
474 | NumberingPhi[Num] = PN; |
475 | } |
476 | |
477 | /// Include the incoming memory state into the hash of the expression for the |
478 | /// given instruction. If the incoming memory state is: |
479 | /// * LiveOnEntry, add the value number of the entry block, |
480 | /// * a MemoryPhi, add the value number of the basic block corresponding to that |
481 | /// MemoryPhi, |
482 | /// * a MemoryDef, add the value number of the memory setting instruction. |
483 | void GVNPass::ValueTable::addMemoryStateToExp(Instruction *I, Expression &Exp) { |
484 | assert(MSSA && "addMemoryStateToExp should not be called without MemorySSA" ); |
485 | assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory" ); |
486 | MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I); |
487 | Exp.VarArgs.push_back(Elt: lookupOrAdd(MA)); |
488 | } |
489 | |
490 | uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { |
491 | // FIXME: Currently the calls which may access the thread id may |
492 | // be considered as not accessing the memory. But this is |
493 | // problematic for coroutines, since coroutines may resume in a |
494 | // different thread. So we disable the optimization here for the |
495 | // correctness. However, it may block many other correct |
496 | // optimizations. Revert this one when we detect the memory |
497 | // accessing kind more precisely. |
498 | if (C->getFunction()->isPresplitCoroutine()) { |
499 | ValueNumbering[C] = NextValueNumber; |
500 | return NextValueNumber++; |
501 | } |
502 | |
503 | // Do not combine convergent calls since they implicitly depend on the set of |
504 | // threads that is currently executing, and they might be in different basic |
505 | // blocks. |
506 | if (C->isConvergent()) { |
507 | ValueNumbering[C] = NextValueNumber; |
508 | return NextValueNumber++; |
509 | } |
510 | |
511 | if (AA->doesNotAccessMemory(Call: C)) { |
512 | Expression Exp = createExpr(I: C); |
513 | uint32_t E = assignExpNewValueNum(Exp).first; |
514 | ValueNumbering[C] = E; |
515 | return E; |
516 | } |
517 | |
518 | if (MD && AA->onlyReadsMemory(Call: C)) { |
519 | Expression Exp = createExpr(I: C); |
520 | auto [E, IsValNumNew] = assignExpNewValueNum(Exp); |
521 | if (IsValNumNew) { |
522 | ValueNumbering[C] = E; |
523 | return E; |
524 | } |
525 | |
526 | MemDepResult LocalDep = MD->getDependency(QueryInst: C); |
527 | |
528 | if (!LocalDep.isDef() && !LocalDep.isNonLocal()) { |
529 | ValueNumbering[C] = NextValueNumber; |
530 | return NextValueNumber++; |
531 | } |
532 | |
533 | if (LocalDep.isDef()) { |
534 | // For masked load/store intrinsics, the local_dep may actually be |
535 | // a normal load or store instruction. |
536 | CallInst *LocalDepCall = dyn_cast<CallInst>(Val: LocalDep.getInst()); |
537 | |
538 | if (!LocalDepCall || LocalDepCall->arg_size() != C->arg_size()) { |
539 | ValueNumbering[C] = NextValueNumber; |
540 | return NextValueNumber++; |
541 | } |
542 | |
543 | for (unsigned I = 0, E = C->arg_size(); I < E; ++I) { |
544 | uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I)); |
545 | uint32_t LocalDepCallVN = lookupOrAdd(V: LocalDepCall->getArgOperand(i: I)); |
546 | if (CVN != LocalDepCallVN) { |
547 | ValueNumbering[C] = NextValueNumber; |
548 | return NextValueNumber++; |
549 | } |
550 | } |
551 | |
552 | uint32_t V = lookupOrAdd(V: LocalDepCall); |
553 | ValueNumbering[C] = V; |
554 | return V; |
555 | } |
556 | |
557 | // Non-local case. |
558 | const MemoryDependenceResults::NonLocalDepInfo &Deps = |
559 | MD->getNonLocalCallDependency(QueryCall: C); |
560 | // FIXME: Move the checking logic to MemDep! |
561 | CallInst *CDep = nullptr; |
562 | |
563 | // Check to see if we have a single dominating call instruction that is |
564 | // identical to C. |
565 | for (const NonLocalDepEntry &I : Deps) { |
566 | if (I.getResult().isNonLocal()) |
567 | continue; |
568 | |
569 | // We don't handle non-definitions. If we already have a call, reject |
570 | // instruction dependencies. |
571 | if (!I.getResult().isDef() || CDep != nullptr) { |
572 | CDep = nullptr; |
573 | break; |
574 | } |
575 | |
576 | CallInst *NonLocalDepCall = dyn_cast<CallInst>(Val: I.getResult().getInst()); |
577 | // FIXME: All duplicated with non-local case. |
578 | if (NonLocalDepCall && DT->properlyDominates(A: I.getBB(), B: C->getParent())) { |
579 | CDep = NonLocalDepCall; |
580 | continue; |
581 | } |
582 | |
583 | CDep = nullptr; |
584 | break; |
585 | } |
586 | |
587 | if (!CDep) { |
588 | ValueNumbering[C] = NextValueNumber; |
589 | return NextValueNumber++; |
590 | } |
591 | |
592 | if (CDep->arg_size() != C->arg_size()) { |
593 | ValueNumbering[C] = NextValueNumber; |
594 | return NextValueNumber++; |
595 | } |
596 | for (unsigned I = 0, E = C->arg_size(); I < E; ++I) { |
597 | uint32_t CVN = lookupOrAdd(V: C->getArgOperand(i: I)); |
598 | uint32_t CDepVN = lookupOrAdd(V: CDep->getArgOperand(i: I)); |
599 | if (CVN != CDepVN) { |
600 | ValueNumbering[C] = NextValueNumber; |
601 | return NextValueNumber++; |
602 | } |
603 | } |
604 | |
605 | uint32_t V = lookupOrAdd(V: CDep); |
606 | ValueNumbering[C] = V; |
607 | return V; |
608 | } |
609 | |
610 | if (MSSA && IsMSSAEnabled && AA->onlyReadsMemory(Call: C)) { |
611 | Expression Exp = createExpr(I: C); |
612 | addMemoryStateToExp(I: C, Exp); |
613 | auto [V, _] = assignExpNewValueNum(Exp); |
614 | ValueNumbering[C] = V; |
615 | return V; |
616 | } |
617 | |
618 | ValueNumbering[C] = NextValueNumber; |
619 | return NextValueNumber++; |
620 | } |
621 | |
622 | /// Returns the value number for the specified load or store instruction. |
623 | uint32_t GVNPass::ValueTable::computeLoadStoreVN(Instruction *I) { |
624 | if (!MSSA || !IsMSSAEnabled) { |
625 | ValueNumbering[I] = NextValueNumber; |
626 | return NextValueNumber++; |
627 | } |
628 | |
629 | Expression Exp; |
630 | Exp.Ty = I->getType(); |
631 | Exp.Opcode = I->getOpcode(); |
632 | for (Use &Op : I->operands()) |
633 | Exp.VarArgs.push_back(Elt: lookupOrAdd(V: Op)); |
634 | addMemoryStateToExp(I, Exp); |
635 | |
636 | auto [V, _] = assignExpNewValueNum(Exp); |
637 | ValueNumbering[I] = V; |
638 | return V; |
639 | } |
640 | |
641 | /// Returns true if a value number exists for the specified value. |
642 | bool GVNPass::ValueTable::exists(Value *V) const { |
643 | return ValueNumbering.contains(Val: V); |
644 | } |
645 | |
646 | uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) { |
647 | return MSSA->isLiveOnEntryDef(MA) || isa<MemoryPhi>(Val: MA) |
648 | ? lookupOrAdd(V: MA->getBlock()) |
649 | : lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst()); |
650 | } |
651 | |
652 | /// lookupOrAdd - Returns the value number for the specified value, assigning |
653 | /// it a new number if it did not have one before. |
654 | uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) { |
655 | DenseMap<Value *, uint32_t>::iterator VI = ValueNumbering.find(Val: V); |
656 | if (VI != ValueNumbering.end()) |
657 | return VI->second; |
658 | |
659 | auto *I = dyn_cast<Instruction>(Val: V); |
660 | if (!I) { |
661 | ValueNumbering[V] = NextValueNumber; |
662 | if (isa<BasicBlock>(Val: V)) |
663 | NumberingBB[NextValueNumber] = cast<BasicBlock>(Val: V); |
664 | return NextValueNumber++; |
665 | } |
666 | |
667 | Expression Exp; |
668 | switch (I->getOpcode()) { |
669 | case Instruction::Call: |
670 | return lookupOrAddCall(C: cast<CallInst>(Val: I)); |
671 | case Instruction::FNeg: |
672 | case Instruction::Add: |
673 | case Instruction::FAdd: |
674 | case Instruction::Sub: |
675 | case Instruction::FSub: |
676 | case Instruction::Mul: |
677 | case Instruction::FMul: |
678 | case Instruction::UDiv: |
679 | case Instruction::SDiv: |
680 | case Instruction::FDiv: |
681 | case Instruction::URem: |
682 | case Instruction::SRem: |
683 | case Instruction::FRem: |
684 | case Instruction::Shl: |
685 | case Instruction::LShr: |
686 | case Instruction::AShr: |
687 | case Instruction::And: |
688 | case Instruction::Or: |
689 | case Instruction::Xor: |
690 | case Instruction::ICmp: |
691 | case Instruction::FCmp: |
692 | case Instruction::Trunc: |
693 | case Instruction::ZExt: |
694 | case Instruction::SExt: |
695 | case Instruction::FPToUI: |
696 | case Instruction::FPToSI: |
697 | case Instruction::UIToFP: |
698 | case Instruction::SIToFP: |
699 | case Instruction::FPTrunc: |
700 | case Instruction::FPExt: |
701 | case Instruction::PtrToInt: |
702 | case Instruction::IntToPtr: |
703 | case Instruction::AddrSpaceCast: |
704 | case Instruction::BitCast: |
705 | case Instruction::Select: |
706 | case Instruction::Freeze: |
707 | case Instruction::ExtractElement: |
708 | case Instruction::InsertElement: |
709 | case Instruction::ShuffleVector: |
710 | case Instruction::InsertValue: |
711 | Exp = createExpr(I); |
712 | break; |
713 | case Instruction::GetElementPtr: |
714 | Exp = createGEPExpr(GEP: cast<GetElementPtrInst>(Val: I)); |
715 | break; |
716 | case Instruction::ExtractValue: |
717 | Exp = createExtractvalueExpr(EI: cast<ExtractValueInst>(Val: I)); |
718 | break; |
719 | case Instruction::PHI: |
720 | ValueNumbering[V] = NextValueNumber; |
721 | NumberingPhi[NextValueNumber] = cast<PHINode>(Val: V); |
722 | return NextValueNumber++; |
723 | case Instruction::Load: |
724 | case Instruction::Store: |
725 | return computeLoadStoreVN(I); |
726 | default: |
727 | ValueNumbering[V] = NextValueNumber; |
728 | return NextValueNumber++; |
729 | } |
730 | |
731 | uint32_t E = assignExpNewValueNum(Exp).first; |
732 | ValueNumbering[V] = E; |
733 | return E; |
734 | } |
735 | |
736 | /// Returns the value number of the specified value. Fails if |
737 | /// the value has not yet been numbered. |
738 | uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const { |
739 | DenseMap<Value *, uint32_t>::const_iterator VI = ValueNumbering.find(Val: V); |
740 | if (Verify) { |
741 | assert(VI != ValueNumbering.end() && "Value not numbered?" ); |
742 | return VI->second; |
743 | } |
744 | return (VI != ValueNumbering.end()) ? VI->second : 0; |
745 | } |
746 | |
747 | /// Returns the value number of the given comparison, |
748 | /// assigning it a new number if it did not have one before. Useful when |
749 | /// we deduced the result of a comparison, but don't immediately have an |
750 | /// instruction realizing that comparison to hand. |
751 | uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode, |
752 | CmpInst::Predicate Predicate, |
753 | Value *LHS, Value *RHS) { |
754 | Expression Exp = createCmpExpr(Opcode, Predicate, LHS, RHS); |
755 | return assignExpNewValueNum(Exp).first; |
756 | } |
757 | |
758 | /// Remove all entries from the ValueTable. |
759 | void GVNPass::ValueTable::clear() { |
760 | ValueNumbering.clear(); |
761 | ExpressionNumbering.clear(); |
762 | NumberingPhi.clear(); |
763 | NumberingBB.clear(); |
764 | PhiTranslateTable.clear(); |
765 | NextValueNumber = 1; |
766 | Expressions.clear(); |
767 | ExprIdx.clear(); |
768 | NextExprNumber = 0; |
769 | } |
770 | |
771 | /// Remove a value from the value numbering. |
772 | void GVNPass::ValueTable::erase(Value *V) { |
773 | uint32_t Num = ValueNumbering.lookup(Val: V); |
774 | ValueNumbering.erase(Val: V); |
775 | // If V is PHINode, V <--> value number is an one-to-one mapping. |
776 | if (isa<PHINode>(Val: V)) |
777 | NumberingPhi.erase(Val: Num); |
778 | else if (isa<BasicBlock>(Val: V)) |
779 | NumberingBB.erase(Val: Num); |
780 | } |
781 | |
782 | /// verifyRemoved - Verify that the value is removed from all internal data |
783 | /// structures. |
784 | void GVNPass::ValueTable::verifyRemoved(const Value *V) const { |
785 | assert(!ValueNumbering.contains(V) && |
786 | "Inst still occurs in value numbering map!" ); |
787 | } |
788 | |
789 | //===----------------------------------------------------------------------===// |
790 | // LeaderMap External Functions |
791 | //===----------------------------------------------------------------------===// |
792 | |
793 | /// Push a new Value to the LeaderTable onto the list for its value number. |
794 | void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) { |
795 | LeaderListNode &Curr = NumToLeaders[N]; |
796 | if (!Curr.Entry.Val) { |
797 | Curr.Entry.Val = V; |
798 | Curr.Entry.BB = BB; |
799 | return; |
800 | } |
801 | |
802 | LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>(); |
803 | Node->Entry.Val = V; |
804 | Node->Entry.BB = BB; |
805 | Node->Next = Curr.Next; |
806 | Curr.Next = Node; |
807 | } |
808 | |
809 | /// Scan the list of values corresponding to a given |
810 | /// value number, and remove the given instruction if encountered. |
811 | void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I, |
812 | const BasicBlock *BB) { |
813 | LeaderListNode *Prev = nullptr; |
814 | LeaderListNode *Curr = &NumToLeaders[N]; |
815 | |
816 | while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) { |
817 | Prev = Curr; |
818 | Curr = Curr->Next; |
819 | } |
820 | |
821 | if (!Curr) |
822 | return; |
823 | |
824 | if (Prev) { |
825 | Prev->Next = Curr->Next; |
826 | } else { |
827 | if (!Curr->Next) { |
828 | Curr->Entry.Val = nullptr; |
829 | Curr->Entry.BB = nullptr; |
830 | } else { |
831 | LeaderListNode *Next = Curr->Next; |
832 | Curr->Entry.Val = Next->Entry.Val; |
833 | Curr->Entry.BB = Next->Entry.BB; |
834 | Curr->Next = Next->Next; |
835 | } |
836 | } |
837 | } |
838 | |
839 | void GVNPass::LeaderMap::verifyRemoved(const Value *V) const { |
840 | // Walk through the value number scope to make sure the instruction isn't |
841 | // ferreted away in it. |
842 | for (const auto &I : NumToLeaders) { |
843 | (void)I; |
844 | assert(I.second.Entry.Val != V && "Inst still in value numbering scope!" ); |
845 | assert( |
846 | std::none_of(leader_iterator(&I.second), leader_iterator(nullptr), |
847 | [=](const LeaderTableEntry &E) { return E.Val == V; }) && |
848 | "Inst still in value numbering scope!" ); |
849 | } |
850 | } |
851 | |
852 | //===----------------------------------------------------------------------===// |
853 | // GVN Pass |
854 | //===----------------------------------------------------------------------===// |
855 | |
856 | bool GVNPass::isPREEnabled() const { |
857 | return Options.AllowPRE.value_or(u&: GVNEnablePRE); |
858 | } |
859 | |
860 | bool GVNPass::isLoadPREEnabled() const { |
861 | return Options.AllowLoadPRE.value_or(u&: GVNEnableLoadPRE); |
862 | } |
863 | |
864 | bool GVNPass::isLoadInLoopPREEnabled() const { |
865 | return Options.AllowLoadInLoopPRE.value_or(u&: GVNEnableLoadInLoopPRE); |
866 | } |
867 | |
868 | bool GVNPass::isLoadPRESplitBackedgeEnabled() const { |
869 | return Options.AllowLoadPRESplitBackedge.value_or( |
870 | u&: GVNEnableSplitBackedgeInLoadPRE); |
871 | } |
872 | |
873 | bool GVNPass::isMemDepEnabled() const { |
874 | return Options.AllowMemDep.value_or(u&: GVNEnableMemDep); |
875 | } |
876 | |
877 | bool GVNPass::isMemorySSAEnabled() const { |
878 | return Options.AllowMemorySSA.value_or(u&: GVNEnableMemorySSA); |
879 | } |
880 | |
881 | PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { |
882 | // FIXME: The order of evaluation of these 'getResult' calls is very |
883 | // significant! Re-ordering these variables will cause GVN when run alone to |
884 | // be less effective! We should fix memdep and basic-aa to not exhibit this |
885 | // behavior, but until then don't change the order here. |
886 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
887 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
888 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
889 | auto &AA = AM.getResult<AAManager>(IR&: F); |
890 | auto *MemDep = |
891 | isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(IR&: F) : nullptr; |
892 | auto &LI = AM.getResult<LoopAnalysis>(IR&: F); |
893 | auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(IR&: F); |
894 | if (isMemorySSAEnabled() && !MSSA) { |
895 | assert(!MemDep && |
896 | "On-demand computation of MemSSA implies that MemDep is disabled!" ); |
897 | MSSA = &AM.getResult<MemorySSAAnalysis>(IR&: F); |
898 | } |
899 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
900 | bool Changed = runImpl(F, RunAC&: AC, RunDT&: DT, RunTLI: TLI, RunAA&: AA, RunMD: MemDep, LI, ORE: &ORE, |
901 | MSSA: MSSA ? &MSSA->getMSSA() : nullptr); |
902 | if (!Changed) |
903 | return PreservedAnalyses::all(); |
904 | PreservedAnalyses PA; |
905 | PA.preserve<DominatorTreeAnalysis>(); |
906 | PA.preserve<TargetLibraryAnalysis>(); |
907 | if (MSSA) |
908 | PA.preserve<MemorySSAAnalysis>(); |
909 | PA.preserve<LoopAnalysis>(); |
910 | return PA; |
911 | } |
912 | |
913 | void GVNPass::printPipeline( |
914 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
915 | static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline( |
916 | OS, MapClassName2PassName); |
917 | |
918 | OS << '<'; |
919 | if (Options.AllowPRE != std::nullopt) |
920 | OS << (*Options.AllowPRE ? "" : "no-" ) << "pre;" ; |
921 | if (Options.AllowLoadPRE != std::nullopt) |
922 | OS << (*Options.AllowLoadPRE ? "" : "no-" ) << "load-pre;" ; |
923 | if (Options.AllowLoadPRESplitBackedge != std::nullopt) |
924 | OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-" ) |
925 | << "split-backedge-load-pre;" ; |
926 | if (Options.AllowMemDep != std::nullopt) |
927 | OS << (*Options.AllowMemDep ? "" : "no-" ) << "memdep;" ; |
928 | if (Options.AllowMemorySSA != std::nullopt) |
929 | OS << (*Options.AllowMemorySSA ? "" : "no-" ) << "memoryssa" ; |
930 | OS << '>'; |
931 | } |
932 | |
933 | void GVNPass::salvageAndRemoveInstruction(Instruction *I) { |
934 | salvageKnowledge(I, AC); |
935 | salvageDebugInfo(I&: *I); |
936 | removeInstruction(I); |
937 | } |
938 | |
939 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
940 | LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &Map) const { |
941 | errs() << "{\n" ; |
942 | for (const auto &[Num, Exp] : Map) { |
943 | errs() << Num << "\n" ; |
944 | Exp->dump(); |
945 | } |
946 | errs() << "}\n" ; |
947 | } |
948 | #endif |
949 | |
950 | enum class AvailabilityState : char { |
951 | /// We know the block *is not* fully available. This is a fixpoint. |
952 | Unavailable = 0, |
953 | /// We know the block *is* fully available. This is a fixpoint. |
954 | Available = 1, |
955 | /// We do not know whether the block is fully available or not, |
956 | /// but we are currently speculating that it will be. |
957 | /// If it would have turned out that the block was, in fact, not fully |
958 | /// available, this would have been cleaned up into an Unavailable. |
959 | SpeculativelyAvailable = 2, |
960 | }; |
961 | |
962 | /// Return true if we can prove that the value |
963 | /// we're analyzing is fully available in the specified block. As we go, keep |
964 | /// track of which blocks we know are fully alive in FullyAvailableBlocks. This |
965 | /// map is actually a tri-state map with the following values: |
966 | /// 0) we know the block *is not* fully available. |
967 | /// 1) we know the block *is* fully available. |
968 | /// 2) we do not know whether the block is fully available or not, but we are |
969 | /// currently speculating that it will be. |
970 | static bool IsValueFullyAvailableInBlock( |
971 | BasicBlock *BB, |
972 | DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) { |
973 | SmallVector<BasicBlock *, 32> Worklist; |
974 | std::optional<BasicBlock *> UnavailableBB; |
975 | |
976 | // The number of times we didn't find an entry for a block in a map and |
977 | // optimistically inserted an entry marking block as speculatively available. |
978 | unsigned NumNewNewSpeculativelyAvailableBBs = 0; |
979 | |
980 | #ifndef NDEBUG |
981 | SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs; |
982 | SmallVector<BasicBlock *, 32> AvailableBBs; |
983 | #endif |
984 | |
985 | Worklist.emplace_back(Args&: BB); |
986 | while (!Worklist.empty()) { |
987 | BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first! |
988 | // Optimistically assume that the block is Speculatively Available and check |
989 | // to see if we already know about this block in one lookup. |
990 | std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV = |
991 | FullyAvailableBlocks.try_emplace( |
992 | Key: CurrBB, Args: AvailabilityState::SpeculativelyAvailable); |
993 | AvailabilityState &State = IV.first->second; |
994 | |
995 | // Did the entry already exist for this block? |
996 | if (!IV.second) { |
997 | if (State == AvailabilityState::Unavailable) { |
998 | UnavailableBB = CurrBB; |
999 | break; // Backpropagate unavailability info. |
1000 | } |
1001 | |
1002 | #ifndef NDEBUG |
1003 | AvailableBBs.emplace_back(CurrBB); |
1004 | #endif |
1005 | continue; // Don't recurse further, but continue processing worklist. |
1006 | } |
1007 | |
1008 | // No entry found for block. |
1009 | ++NumNewNewSpeculativelyAvailableBBs; |
1010 | bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations; |
1011 | |
1012 | // If we have exhausted our budget, mark this block as unavailable. |
1013 | // Also, if this block has no predecessors, the value isn't live-in here. |
1014 | if (OutOfBudget || pred_empty(BB: CurrBB)) { |
1015 | MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget; |
1016 | State = AvailabilityState::Unavailable; |
1017 | UnavailableBB = CurrBB; |
1018 | break; // Backpropagate unavailability info. |
1019 | } |
1020 | |
1021 | // Tentatively consider this block as speculatively available. |
1022 | #ifndef NDEBUG |
1023 | NewSpeculativelyAvailableBBs.insert(CurrBB); |
1024 | #endif |
1025 | // And further recurse into block's predecessors, in depth-first order! |
1026 | Worklist.append(in_start: pred_begin(BB: CurrBB), in_end: pred_end(BB: CurrBB)); |
1027 | } |
1028 | |
1029 | #if LLVM_ENABLE_STATS |
1030 | IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax( |
1031 | NumNewNewSpeculativelyAvailableBBs); |
1032 | #endif |
1033 | |
1034 | // If the block isn't marked as fixpoint yet |
1035 | // (the Unavailable and Available states are fixpoints). |
1036 | auto MarkAsFixpointAndEnqueueSuccessors = |
1037 | [&](BasicBlock *BB, AvailabilityState FixpointState) { |
1038 | auto It = FullyAvailableBlocks.find(Val: BB); |
1039 | if (It == FullyAvailableBlocks.end()) |
1040 | return; // Never queried this block, leave as-is. |
1041 | switch (AvailabilityState &State = It->second) { |
1042 | case AvailabilityState::Unavailable: |
1043 | case AvailabilityState::Available: |
1044 | return; // Don't backpropagate further, continue processing worklist. |
1045 | case AvailabilityState::SpeculativelyAvailable: // Fix it! |
1046 | State = FixpointState; |
1047 | #ifndef NDEBUG |
1048 | assert(NewSpeculativelyAvailableBBs.erase(BB) && |
1049 | "Found a speculatively available successor leftover?" ); |
1050 | #endif |
1051 | // Queue successors for further processing. |
1052 | Worklist.append(in_start: succ_begin(BB), in_end: succ_end(BB)); |
1053 | return; |
1054 | } |
1055 | }; |
1056 | |
1057 | if (UnavailableBB) { |
1058 | // Okay, we have encountered an unavailable block. |
1059 | // Mark speculatively available blocks reachable from UnavailableBB as |
1060 | // unavailable as well. Paths are terminated when they reach blocks not in |
1061 | // FullyAvailableBlocks or they are not marked as speculatively available. |
1062 | Worklist.clear(); |
1063 | Worklist.append(in_start: succ_begin(BB: *UnavailableBB), in_end: succ_end(BB: *UnavailableBB)); |
1064 | while (!Worklist.empty()) |
1065 | MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), |
1066 | AvailabilityState::Unavailable); |
1067 | } |
1068 | |
1069 | #ifndef NDEBUG |
1070 | Worklist.clear(); |
1071 | for (BasicBlock *AvailableBB : AvailableBBs) |
1072 | Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB)); |
1073 | while (!Worklist.empty()) |
1074 | MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), |
1075 | AvailabilityState::Available); |
1076 | |
1077 | assert(NewSpeculativelyAvailableBBs.empty() && |
1078 | "Must have fixed all the new speculatively available blocks." ); |
1079 | #endif |
1080 | |
1081 | return !UnavailableBB; |
1082 | } |
1083 | |
1084 | /// If the specified OldValue exists in ValuesPerBlock, replace its value with |
1085 | /// NewValue. |
1086 | static void replaceValuesPerBlockEntry( |
1087 | SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue, |
1088 | Value *NewValue) { |
1089 | for (AvailableValueInBlock &V : ValuesPerBlock) { |
1090 | if (V.AV.Val == OldValue) |
1091 | V.AV.Val = NewValue; |
1092 | if (V.AV.isSelectValue()) { |
1093 | if (V.AV.V1 == OldValue) |
1094 | V.AV.V1 = NewValue; |
1095 | if (V.AV.V2 == OldValue) |
1096 | V.AV.V2 = NewValue; |
1097 | } |
1098 | } |
1099 | } |
1100 | |
1101 | /// Given a set of loads specified by ValuesPerBlock, |
1102 | /// construct SSA form, allowing us to eliminate Load. This returns the value |
1103 | /// that should be used at Load's definition site. |
1104 | static Value * |
1105 | ConstructSSAForLoadSet(LoadInst *Load, |
1106 | SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, |
1107 | GVNPass &GVN) { |
1108 | // Check for the fully redundant, dominating load case. In this case, we can |
1109 | // just use the dominating value directly. |
1110 | if (ValuesPerBlock.size() == 1 && |
1111 | GVN.getDominatorTree().properlyDominates(A: ValuesPerBlock[0].BB, |
1112 | B: Load->getParent())) { |
1113 | assert(!ValuesPerBlock[0].AV.isUndefValue() && |
1114 | "Dead BB dominate this block" ); |
1115 | return ValuesPerBlock[0].MaterializeAdjustedValue(Load); |
1116 | } |
1117 | |
1118 | // Otherwise, we have to construct SSA form. |
1119 | SmallVector<PHINode*, 8> NewPHIs; |
1120 | SSAUpdater SSAUpdate(&NewPHIs); |
1121 | SSAUpdate.Initialize(Ty: Load->getType(), Name: Load->getName()); |
1122 | |
1123 | for (const AvailableValueInBlock &AV : ValuesPerBlock) { |
1124 | BasicBlock *BB = AV.BB; |
1125 | |
1126 | if (AV.AV.isUndefValue()) |
1127 | continue; |
1128 | |
1129 | if (SSAUpdate.HasValueForBlock(BB)) |
1130 | continue; |
1131 | |
1132 | // If the value is the load that we will be eliminating, and the block it's |
1133 | // available in is the block that the load is in, then don't add it as |
1134 | // SSAUpdater will resolve the value to the relevant phi which may let it |
1135 | // avoid phi construction entirely if there's actually only one value. |
1136 | if (BB == Load->getParent() && |
1137 | ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) || |
1138 | (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load))) |
1139 | continue; |
1140 | |
1141 | SSAUpdate.AddAvailableValue(BB, V: AV.MaterializeAdjustedValue(Load)); |
1142 | } |
1143 | |
1144 | // Perform PHI construction. |
1145 | return SSAUpdate.GetValueInMiddleOfBlock(BB: Load->getParent()); |
1146 | } |
1147 | |
1148 | Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load, |
1149 | Instruction *InsertPt) const { |
1150 | Value *Res; |
1151 | Type *LoadTy = Load->getType(); |
1152 | const DataLayout &DL = Load->getDataLayout(); |
1153 | if (isSimpleValue()) { |
1154 | Res = getSimpleValue(); |
1155 | if (Res->getType() != LoadTy) { |
1156 | Res = getValueForLoad(SrcVal: Res, Offset, LoadTy, InsertPt, F: Load->getFunction()); |
1157 | |
1158 | LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset |
1159 | << " " << *getSimpleValue() << '\n' |
1160 | << *Res << '\n' |
1161 | << "\n\n\n" ); |
1162 | } |
1163 | } else if (isCoercedLoadValue()) { |
1164 | LoadInst *CoercedLoad = getCoercedLoadValue(); |
1165 | if (CoercedLoad->getType() == LoadTy && Offset == 0) { |
1166 | Res = CoercedLoad; |
1167 | combineMetadataForCSE(K: CoercedLoad, J: Load, DoesKMove: false); |
1168 | } else { |
1169 | Res = getValueForLoad(SrcVal: CoercedLoad, Offset, LoadTy, InsertPt, |
1170 | F: Load->getFunction()); |
1171 | // We are adding a new user for this load, for which the original |
1172 | // metadata may not hold. Additionally, the new load may have a different |
1173 | // size and type, so their metadata cannot be combined in any |
1174 | // straightforward way. |
1175 | // Drop all metadata that is not known to cause immediate UB on violation, |
1176 | // unless the load has !noundef, in which case all metadata violations |
1177 | // will be promoted to UB. |
1178 | // TODO: We can combine noalias/alias.scope metadata here, because it is |
1179 | // independent of the load type. |
1180 | if (!CoercedLoad->hasMetadata(KindID: LLVMContext::MD_noundef)) |
1181 | CoercedLoad->dropUnknownNonDebugMetadata( |
1182 | KnownIDs: {LLVMContext::MD_dereferenceable, |
1183 | LLVMContext::MD_dereferenceable_or_null, |
1184 | LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group}); |
1185 | LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset |
1186 | << " " << *getCoercedLoadValue() << '\n' |
1187 | << *Res << '\n' |
1188 | << "\n\n\n" ); |
1189 | } |
1190 | } else if (isMemIntrinValue()) { |
1191 | Res = getMemInstValueForLoad(SrcInst: getMemIntrinValue(), Offset, LoadTy, |
1192 | InsertPt, DL); |
1193 | LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset |
1194 | << " " << *getMemIntrinValue() << '\n' |
1195 | << *Res << '\n' |
1196 | << "\n\n\n" ); |
1197 | } else if (isSelectValue()) { |
1198 | // Introduce a new value select for a load from an eligible pointer select. |
1199 | SelectInst *Sel = getSelectValue(); |
1200 | assert(V1 && V2 && "both value operands of the select must be present" ); |
1201 | Res = |
1202 | SelectInst::Create(C: Sel->getCondition(), S1: V1, S2: V2, NameStr: "" , InsertBefore: Sel->getIterator()); |
1203 | // We use the DebugLoc from the original load here, as this instruction |
1204 | // materializes the value that would previously have been loaded. |
1205 | cast<SelectInst>(Val: Res)->setDebugLoc(Load->getDebugLoc()); |
1206 | } else { |
1207 | llvm_unreachable("Should not materialize value from dead block" ); |
1208 | } |
1209 | assert(Res && "failed to materialize?" ); |
1210 | return Res; |
1211 | } |
1212 | |
1213 | static bool isLifetimeStart(const Instruction *Inst) { |
1214 | if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Val: Inst)) |
1215 | return II->getIntrinsicID() == Intrinsic::lifetime_start; |
1216 | return false; |
1217 | } |
1218 | |
1219 | /// Assuming To can be reached from both From and Between, does Between lie on |
1220 | /// every path from From to To? |
1221 | static bool liesBetween(const Instruction *From, Instruction *Between, |
1222 | const Instruction *To, const DominatorTree *DT) { |
1223 | if (From->getParent() == Between->getParent()) |
1224 | return DT->dominates(Def: From, User: Between); |
1225 | SmallSet<BasicBlock *, 1> Exclusion; |
1226 | Exclusion.insert(Ptr: Between->getParent()); |
1227 | return !isPotentiallyReachable(From, To, ExclusionSet: &Exclusion, DT); |
1228 | } |
1229 | |
1230 | static const Instruction *findMayClobberedPtrAccess(LoadInst *Load, |
1231 | const DominatorTree *DT) { |
1232 | Value *PtrOp = Load->getPointerOperand(); |
1233 | if (!PtrOp->hasUseList()) |
1234 | return nullptr; |
1235 | |
1236 | Instruction *OtherAccess = nullptr; |
1237 | |
1238 | for (auto *U : PtrOp->users()) { |
1239 | if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) { |
1240 | auto *I = cast<Instruction>(Val: U); |
1241 | if (I->getFunction() == Load->getFunction() && DT->dominates(Def: I, User: Load)) { |
1242 | // Use the most immediately dominating value. |
1243 | if (OtherAccess) { |
1244 | if (DT->dominates(Def: OtherAccess, User: I)) |
1245 | OtherAccess = I; |
1246 | else |
1247 | assert(U == OtherAccess || DT->dominates(I, OtherAccess)); |
1248 | } else |
1249 | OtherAccess = I; |
1250 | } |
1251 | } |
1252 | } |
1253 | |
1254 | if (OtherAccess) |
1255 | return OtherAccess; |
1256 | |
1257 | // There is no dominating use, check if we can find a closest non-dominating |
1258 | // use that lies between any other potentially available use and Load. |
1259 | for (auto *U : PtrOp->users()) { |
1260 | if (U != Load && (isa<LoadInst>(Val: U) || isa<StoreInst>(Val: U))) { |
1261 | auto *I = cast<Instruction>(Val: U); |
1262 | if (I->getFunction() == Load->getFunction() && |
1263 | isPotentiallyReachable(From: I, To: Load, ExclusionSet: nullptr, DT)) { |
1264 | if (OtherAccess) { |
1265 | if (liesBetween(From: OtherAccess, Between: I, To: Load, DT)) { |
1266 | OtherAccess = I; |
1267 | } else if (!liesBetween(From: I, Between: OtherAccess, To: Load, DT)) { |
1268 | // These uses are both partially available at Load were it not for |
1269 | // the clobber, but neither lies strictly after the other. |
1270 | OtherAccess = nullptr; |
1271 | break; |
1272 | } // else: keep current OtherAccess since it lies between U and |
1273 | // Load. |
1274 | } else { |
1275 | OtherAccess = I; |
1276 | } |
1277 | } |
1278 | } |
1279 | } |
1280 | |
1281 | return OtherAccess; |
1282 | } |
1283 | |
1284 | /// Try to locate the three instruction involved in a missed |
1285 | /// load-elimination case that is due to an intervening store. |
1286 | static void (LoadInst *Load, MemDepResult DepInfo, |
1287 | const DominatorTree *DT, |
1288 | OptimizationRemarkEmitter *ORE) { |
1289 | using namespace ore; |
1290 | |
1291 | OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered" , Load); |
1292 | R << "load of type " << NV("Type" , Load->getType()) << " not eliminated" |
1293 | << setExtraArgs(); |
1294 | |
1295 | const Instruction *OtherAccess = findMayClobberedPtrAccess(Load, DT); |
1296 | if (OtherAccess) |
1297 | R << " in favor of " << NV("OtherAccess" , OtherAccess); |
1298 | |
1299 | R << " because it is clobbered by " << NV("ClobberedBy" , DepInfo.getInst()); |
1300 | |
1301 | ORE->emit(OptDiag&: R); |
1302 | } |
1303 | |
1304 | // Find non-clobbered value for Loc memory location in extended basic block |
1305 | // (chain of basic blocks with single predecessors) starting From instruction. |
1306 | static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy, |
1307 | Instruction *From, AAResults *AA) { |
1308 | uint32_t NumVisitedInsts = 0; |
1309 | BasicBlock *FromBB = From->getParent(); |
1310 | BatchAAResults BatchAA(*AA); |
1311 | for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor()) |
1312 | for (auto *Inst = BB == FromBB ? From : BB->getTerminator(); |
1313 | Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) { |
1314 | // Stop the search if limit is reached. |
1315 | if (++NumVisitedInsts > MaxNumVisitedInsts) |
1316 | return nullptr; |
1317 | if (isModSet(MRI: BatchAA.getModRefInfo(I: Inst, OptLoc: Loc))) |
1318 | return nullptr; |
1319 | if (auto *LI = dyn_cast<LoadInst>(Val: Inst)) |
1320 | if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy) |
1321 | return LI; |
1322 | } |
1323 | return nullptr; |
1324 | } |
1325 | |
1326 | std::optional<AvailableValue> |
1327 | GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, |
1328 | Value *Address) { |
1329 | assert(Load->isUnordered() && "rules below are incorrect for ordered access" ); |
1330 | assert(DepInfo.isLocal() && "expected a local dependence" ); |
1331 | |
1332 | Instruction *DepInst = DepInfo.getInst(); |
1333 | |
1334 | const DataLayout &DL = Load->getDataLayout(); |
1335 | if (DepInfo.isClobber()) { |
1336 | // If the dependence is to a store that writes to a superset of the bits |
1337 | // read by the load, we can extract the bits we need for the load from the |
1338 | // stored value. |
1339 | if (StoreInst *DepSI = dyn_cast<StoreInst>(Val: DepInst)) { |
1340 | // Can't forward from non-atomic to atomic without violating memory model. |
1341 | if (Address && Load->isAtomic() <= DepSI->isAtomic()) { |
1342 | int Offset = |
1343 | analyzeLoadFromClobberingStore(LoadTy: Load->getType(), LoadPtr: Address, DepSI, DL); |
1344 | if (Offset != -1) |
1345 | return AvailableValue::get(V: DepSI->getValueOperand(), Offset); |
1346 | } |
1347 | } |
1348 | |
1349 | // Check to see if we have something like this: |
1350 | // load i32* P |
1351 | // load i8* (P+1) |
1352 | // if we have this, replace the later with an extraction from the former. |
1353 | if (LoadInst *DepLoad = dyn_cast<LoadInst>(Val: DepInst)) { |
1354 | // If this is a clobber and L is the first instruction in its block, then |
1355 | // we have the first instruction in the entry block. |
1356 | // Can't forward from non-atomic to atomic without violating memory model. |
1357 | if (DepLoad != Load && Address && |
1358 | Load->isAtomic() <= DepLoad->isAtomic()) { |
1359 | Type *LoadType = Load->getType(); |
1360 | int Offset = -1; |
1361 | |
1362 | // If MD reported clobber, check it was nested. |
1363 | if (DepInfo.isClobber() && |
1364 | canCoerceMustAliasedValueToLoad(StoredVal: DepLoad, LoadTy: LoadType, |
1365 | F: DepLoad->getFunction())) { |
1366 | const auto ClobberOff = MD->getClobberOffset(DepInst: DepLoad); |
1367 | // GVN has no deal with a negative offset. |
1368 | Offset = (ClobberOff == std::nullopt || *ClobberOff < 0) |
1369 | ? -1 |
1370 | : *ClobberOff; |
1371 | } |
1372 | if (Offset == -1) |
1373 | Offset = |
1374 | analyzeLoadFromClobberingLoad(LoadTy: LoadType, LoadPtr: Address, DepLI: DepLoad, DL); |
1375 | if (Offset != -1) |
1376 | return AvailableValue::getLoad(Load: DepLoad, Offset); |
1377 | } |
1378 | } |
1379 | |
1380 | // If the clobbering value is a memset/memcpy/memmove, see if we can |
1381 | // forward a value on from it. |
1382 | if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Val: DepInst)) { |
1383 | if (Address && !Load->isAtomic()) { |
1384 | int Offset = analyzeLoadFromClobberingMemInst(LoadTy: Load->getType(), LoadPtr: Address, |
1385 | DepMI, DL); |
1386 | if (Offset != -1) |
1387 | return AvailableValue::getMI(MI: DepMI, Offset); |
1388 | } |
1389 | } |
1390 | |
1391 | // Nothing known about this clobber, have to be conservative. |
1392 | LLVM_DEBUG( |
1393 | // fast print dep, using operator<< on instruction is too slow. |
1394 | dbgs() << "GVN: load " ; Load->printAsOperand(dbgs()); |
1395 | dbgs() << " is clobbered by " << *DepInst << '\n';); |
1396 | if (ORE->allowExtraAnalysis(DEBUG_TYPE)) |
1397 | reportMayClobberedLoad(Load, DepInfo, DT, ORE); |
1398 | |
1399 | return std::nullopt; |
1400 | } |
1401 | assert(DepInfo.isDef() && "follows from above" ); |
1402 | |
1403 | // Loading the alloca -> undef. |
1404 | // Loading immediately after lifetime begin -> undef. |
1405 | if (isa<AllocaInst>(Val: DepInst) || isLifetimeStart(Inst: DepInst)) |
1406 | return AvailableValue::get(V: UndefValue::get(T: Load->getType())); |
1407 | |
1408 | if (Constant *InitVal = |
1409 | getInitialValueOfAllocation(V: DepInst, TLI, Ty: Load->getType())) |
1410 | return AvailableValue::get(V: InitVal); |
1411 | |
1412 | if (StoreInst *S = dyn_cast<StoreInst>(Val: DepInst)) { |
1413 | // Reject loads and stores that are to the same address but are of |
1414 | // different types if we have to. If the stored value is convertable to |
1415 | // the loaded value, we can reuse it. |
1416 | if (!canCoerceMustAliasedValueToLoad(StoredVal: S->getValueOperand(), LoadTy: Load->getType(), |
1417 | F: S->getFunction())) |
1418 | return std::nullopt; |
1419 | |
1420 | // Can't forward from non-atomic to atomic without violating memory model. |
1421 | if (S->isAtomic() < Load->isAtomic()) |
1422 | return std::nullopt; |
1423 | |
1424 | return AvailableValue::get(V: S->getValueOperand()); |
1425 | } |
1426 | |
1427 | if (LoadInst *LD = dyn_cast<LoadInst>(Val: DepInst)) { |
1428 | // If the types mismatch and we can't handle it, reject reuse of the load. |
1429 | // If the stored value is larger or equal to the loaded value, we can reuse |
1430 | // it. |
1431 | if (!canCoerceMustAliasedValueToLoad(StoredVal: LD, LoadTy: Load->getType(), |
1432 | F: LD->getFunction())) |
1433 | return std::nullopt; |
1434 | |
1435 | // Can't forward from non-atomic to atomic without violating memory model. |
1436 | if (LD->isAtomic() < Load->isAtomic()) |
1437 | return std::nullopt; |
1438 | |
1439 | return AvailableValue::getLoad(Load: LD); |
1440 | } |
1441 | |
1442 | // Check if load with Addr dependent from select can be converted to select |
1443 | // between load values. There must be no instructions between the found |
1444 | // loads and DepInst that may clobber the loads. |
1445 | if (auto *Sel = dyn_cast<SelectInst>(Val: DepInst)) { |
1446 | assert(Sel->getType() == Load->getPointerOperandType()); |
1447 | auto Loc = MemoryLocation::get(LI: Load); |
1448 | Value *V1 = |
1449 | findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getTrueValue()), |
1450 | LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis()); |
1451 | if (!V1) |
1452 | return std::nullopt; |
1453 | Value *V2 = |
1454 | findDominatingValue(Loc: Loc.getWithNewPtr(NewPtr: Sel->getFalseValue()), |
1455 | LoadTy: Load->getType(), From: DepInst, AA: getAliasAnalysis()); |
1456 | if (!V2) |
1457 | return std::nullopt; |
1458 | return AvailableValue::getSelect(Sel, V1, V2); |
1459 | } |
1460 | |
1461 | // Unknown def - must be conservative. |
1462 | LLVM_DEBUG( |
1463 | // fast print dep, using operator<< on instruction is too slow. |
1464 | dbgs() << "GVN: load " ; Load->printAsOperand(dbgs()); |
1465 | dbgs() << " has unknown def " << *DepInst << '\n';); |
1466 | return std::nullopt; |
1467 | } |
1468 | |
1469 | void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, |
1470 | AvailValInBlkVect &ValuesPerBlock, |
1471 | UnavailBlkVect &UnavailableBlocks) { |
1472 | // Filter out useless results (non-locals, etc). Keep track of the blocks |
1473 | // where we have a value available in repl, also keep track of whether we see |
1474 | // dependencies that produce an unknown value for the load (such as a call |
1475 | // that could potentially clobber the load). |
1476 | for (const auto &Dep : Deps) { |
1477 | BasicBlock *DepBB = Dep.getBB(); |
1478 | MemDepResult DepInfo = Dep.getResult(); |
1479 | |
1480 | if (DeadBlocks.count(key: DepBB)) { |
1481 | // Dead dependent mem-op disguise as a load evaluating the same value |
1482 | // as the load in question. |
1483 | ValuesPerBlock.push_back(Elt: AvailableValueInBlock::getUndef(BB: DepBB)); |
1484 | continue; |
1485 | } |
1486 | |
1487 | if (!DepInfo.isLocal()) { |
1488 | UnavailableBlocks.push_back(Elt: DepBB); |
1489 | continue; |
1490 | } |
1491 | |
1492 | // The address being loaded in this non-local block may not be the same as |
1493 | // the pointer operand of the load if PHI translation occurs. Make sure |
1494 | // to consider the right address. |
1495 | if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Address: Dep.getAddress())) { |
1496 | // subtlety: because we know this was a non-local dependency, we know |
1497 | // it's safe to materialize anywhere between the instruction within |
1498 | // DepInfo and the end of it's block. |
1499 | ValuesPerBlock.push_back( |
1500 | Elt: AvailableValueInBlock::get(BB: DepBB, AV: std::move(*AV))); |
1501 | } else { |
1502 | UnavailableBlocks.push_back(Elt: DepBB); |
1503 | } |
1504 | } |
1505 | |
1506 | assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() && |
1507 | "post condition violation" ); |
1508 | } |
1509 | |
1510 | /// Given the following code, v1 is partially available on some edges, but not |
1511 | /// available on the edge from PredBB. This function tries to find if there is |
1512 | /// another identical load in the other successor of PredBB. |
1513 | /// |
1514 | /// v0 = load %addr |
1515 | /// br %LoadBB |
1516 | /// |
1517 | /// LoadBB: |
1518 | /// v1 = load %addr |
1519 | /// ... |
1520 | /// |
1521 | /// PredBB: |
1522 | /// ... |
1523 | /// br %cond, label %LoadBB, label %SuccBB |
1524 | /// |
1525 | /// SuccBB: |
1526 | /// v2 = load %addr |
1527 | /// ... |
1528 | /// |
1529 | LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB, |
1530 | LoadInst *Load) { |
1531 | // For simplicity we handle a Pred has 2 successors only. |
1532 | auto *Term = Pred->getTerminator(); |
1533 | if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator()) |
1534 | return nullptr; |
1535 | auto *SuccBB = Term->getSuccessor(Idx: 0); |
1536 | if (SuccBB == LoadBB) |
1537 | SuccBB = Term->getSuccessor(Idx: 1); |
1538 | if (!SuccBB->getSinglePredecessor()) |
1539 | return nullptr; |
1540 | |
1541 | unsigned int NumInsts = MaxNumInsnsPerBlock; |
1542 | for (Instruction &Inst : *SuccBB) { |
1543 | if (Inst.isDebugOrPseudoInst()) |
1544 | continue; |
1545 | if (--NumInsts == 0) |
1546 | return nullptr; |
1547 | |
1548 | if (!Inst.isIdenticalTo(I: Load)) |
1549 | continue; |
1550 | |
1551 | MemDepResult Dep = MD->getDependency(QueryInst: &Inst); |
1552 | // If an identical load doesn't depends on any local instructions, it can |
1553 | // be safely moved to PredBB. |
1554 | // Also check for the implicit control flow instructions. See the comments |
1555 | // in PerformLoadPRE for details. |
1556 | if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(Insn: &Inst)) |
1557 | return cast<LoadInst>(Val: &Inst); |
1558 | |
1559 | // Otherwise there is something in the same BB clobbers the memory, we can't |
1560 | // move this and later load to PredBB. |
1561 | return nullptr; |
1562 | } |
1563 | |
1564 | return nullptr; |
1565 | } |
1566 | |
1567 | void GVNPass::eliminatePartiallyRedundantLoad( |
1568 | LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, |
1569 | MapVector<BasicBlock *, Value *> &AvailableLoads, |
1570 | MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) { |
1571 | for (const auto &AvailableLoad : AvailableLoads) { |
1572 | BasicBlock *UnavailableBlock = AvailableLoad.first; |
1573 | Value *LoadPtr = AvailableLoad.second; |
1574 | |
1575 | auto *NewLoad = new LoadInst( |
1576 | Load->getType(), LoadPtr, Load->getName() + ".pre" , Load->isVolatile(), |
1577 | Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(), |
1578 | UnavailableBlock->getTerminator()->getIterator()); |
1579 | NewLoad->setDebugLoc(Load->getDebugLoc()); |
1580 | if (MSSAU) { |
1581 | auto *NewAccess = MSSAU->createMemoryAccessInBB( |
1582 | I: NewLoad, Definition: nullptr, BB: NewLoad->getParent(), Point: MemorySSA::BeforeTerminator); |
1583 | if (auto *NewDef = dyn_cast<MemoryDef>(Val: NewAccess)) |
1584 | MSSAU->insertDef(Def: NewDef, /*RenameUses=*/true); |
1585 | else |
1586 | MSSAU->insertUse(Use: cast<MemoryUse>(Val: NewAccess), /*RenameUses=*/true); |
1587 | } |
1588 | |
1589 | // Transfer the old load's AA tags to the new load. |
1590 | AAMDNodes Tags = Load->getAAMetadata(); |
1591 | if (Tags) |
1592 | NewLoad->setAAMetadata(Tags); |
1593 | |
1594 | if (auto *MD = Load->getMetadata(KindID: LLVMContext::MD_invariant_load)) |
1595 | NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_load, Node: MD); |
1596 | if (auto *InvGroupMD = Load->getMetadata(KindID: LLVMContext::MD_invariant_group)) |
1597 | NewLoad->setMetadata(KindID: LLVMContext::MD_invariant_group, Node: InvGroupMD); |
1598 | if (auto *RangeMD = Load->getMetadata(KindID: LLVMContext::MD_range)) |
1599 | NewLoad->setMetadata(KindID: LLVMContext::MD_range, Node: RangeMD); |
1600 | if (auto *AccessMD = Load->getMetadata(KindID: LLVMContext::MD_access_group)) |
1601 | if (LI->getLoopFor(BB: Load->getParent()) == LI->getLoopFor(BB: UnavailableBlock)) |
1602 | NewLoad->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessMD); |
1603 | |
1604 | // We do not propagate the old load's debug location, because the new |
1605 | // load now lives in a different BB, and we want to avoid a jumpy line |
1606 | // table. |
1607 | // FIXME: How do we retain source locations without causing poor debugging |
1608 | // behavior? |
1609 | |
1610 | // Add the newly created load. |
1611 | ValuesPerBlock.push_back( |
1612 | Elt: AvailableValueInBlock::get(BB: UnavailableBlock, V: NewLoad)); |
1613 | MD->invalidateCachedPointerInfo(Ptr: LoadPtr); |
1614 | LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); |
1615 | |
1616 | // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old |
1617 | // load instruction with the new created load instruction. |
1618 | if (CriticalEdgePredAndLoad) { |
1619 | auto It = CriticalEdgePredAndLoad->find(Key: UnavailableBlock); |
1620 | if (It != CriticalEdgePredAndLoad->end()) { |
1621 | ++NumPRELoadMoved2CEPred; |
1622 | ICF->insertInstructionTo(Inst: NewLoad, BB: UnavailableBlock); |
1623 | LoadInst *OldLoad = It->second; |
1624 | combineMetadataForCSE(K: NewLoad, J: OldLoad, DoesKMove: false); |
1625 | OldLoad->replaceAllUsesWith(V: NewLoad); |
1626 | replaceValuesPerBlockEntry(ValuesPerBlock, OldValue: OldLoad, NewValue: NewLoad); |
1627 | if (uint32_t ValNo = VN.lookup(V: OldLoad, Verify: false)) |
1628 | LeaderTable.erase(N: ValNo, I: OldLoad, BB: OldLoad->getParent()); |
1629 | removeInstruction(I: OldLoad); |
1630 | } |
1631 | } |
1632 | } |
1633 | |
1634 | // Perform PHI construction. |
1635 | Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this); |
1636 | // ConstructSSAForLoadSet is responsible for combining metadata. |
1637 | ICF->removeUsersOf(Inst: Load); |
1638 | Load->replaceAllUsesWith(V); |
1639 | if (isa<PHINode>(Val: V)) |
1640 | V->takeName(V: Load); |
1641 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
1642 | I->setDebugLoc(Load->getDebugLoc()); |
1643 | if (V->getType()->isPtrOrPtrVectorTy()) |
1644 | MD->invalidateCachedPointerInfo(Ptr: V); |
1645 | ORE->emit(RemarkBuilder: [&]() { |
1646 | return OptimizationRemark(DEBUG_TYPE, "LoadPRE" , Load) |
1647 | << "load eliminated by PRE" ; |
1648 | }); |
1649 | salvageAndRemoveInstruction(I: Load); |
1650 | } |
1651 | |
1652 | bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, |
1653 | UnavailBlkVect &UnavailableBlocks) { |
1654 | // Okay, we have *some* definitions of the value. This means that the value |
1655 | // is available in some of our (transitive) predecessors. Lets think about |
1656 | // doing PRE of this load. This will involve inserting a new load into the |
1657 | // predecessor when it's not available. We could do this in general, but |
1658 | // prefer to not increase code size. As such, we only do this when we know |
1659 | // that we only have to insert *one* load (which means we're basically moving |
1660 | // the load, not inserting a new one). |
1661 | |
1662 | SmallPtrSet<BasicBlock *, 4> Blockers(llvm::from_range, UnavailableBlocks); |
1663 | |
1664 | // Let's find the first basic block with more than one predecessor. Walk |
1665 | // backwards through predecessors if needed. |
1666 | BasicBlock *LoadBB = Load->getParent(); |
1667 | BasicBlock *TmpBB = LoadBB; |
1668 | |
1669 | // Check that there is no implicit control flow instructions above our load in |
1670 | // its block. If there is an instruction that doesn't always pass the |
1671 | // execution to the following instruction, then moving through it may become |
1672 | // invalid. For example: |
1673 | // |
1674 | // int arr[LEN]; |
1675 | // int index = ???; |
1676 | // ... |
1677 | // guard(0 <= index && index < LEN); |
1678 | // use(arr[index]); |
1679 | // |
1680 | // It is illegal to move the array access to any point above the guard, |
1681 | // because if the index is out of bounds we should deoptimize rather than |
1682 | // access the array. |
1683 | // Check that there is no guard in this block above our instruction. |
1684 | bool MustEnsureSafetyOfSpeculativeExecution = |
1685 | ICF->isDominatedByICFIFromSameBlock(Insn: Load); |
1686 | |
1687 | while (TmpBB->getSinglePredecessor()) { |
1688 | TmpBB = TmpBB->getSinglePredecessor(); |
1689 | if (TmpBB == LoadBB) // Infinite (unreachable) loop. |
1690 | return false; |
1691 | if (Blockers.count(Ptr: TmpBB)) |
1692 | return false; |
1693 | |
1694 | // If any of these blocks has more than one successor (i.e. if the edge we |
1695 | // just traversed was critical), then there are other paths through this |
1696 | // block along which the load may not be anticipated. Hoisting the load |
1697 | // above this block would be adding the load to execution paths along |
1698 | // which it was not previously executed. |
1699 | if (TmpBB->getTerminator()->getNumSuccessors() != 1) |
1700 | return false; |
1701 | |
1702 | // Check that there is no implicit control flow in a block above. |
1703 | MustEnsureSafetyOfSpeculativeExecution = |
1704 | MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(BB: TmpBB); |
1705 | } |
1706 | |
1707 | assert(TmpBB); |
1708 | LoadBB = TmpBB; |
1709 | |
1710 | // Check to see how many predecessors have the loaded value fully |
1711 | // available. |
1712 | MapVector<BasicBlock *, Value *> PredLoads; |
1713 | DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks; |
1714 | for (const AvailableValueInBlock &AV : ValuesPerBlock) |
1715 | FullyAvailableBlocks[AV.BB] = AvailabilityState::Available; |
1716 | for (BasicBlock *UnavailableBB : UnavailableBlocks) |
1717 | FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; |
1718 | |
1719 | // The edge from Pred to LoadBB is a critical edge will be splitted. |
1720 | SmallVector<BasicBlock *, 4> CriticalEdgePredSplit; |
1721 | // The edge from Pred to LoadBB is a critical edge, another successor of Pred |
1722 | // contains a load can be moved to Pred. This data structure maps the Pred to |
1723 | // the movable load. |
1724 | MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad; |
1725 | for (BasicBlock *Pred : predecessors(BB: LoadBB)) { |
1726 | // If any predecessor block is an EH pad that does not allow non-PHI |
1727 | // instructions before the terminator, we can't PRE the load. |
1728 | if (Pred->getTerminator()->isEHPad()) { |
1729 | LLVM_DEBUG( |
1730 | dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '" |
1731 | << Pred->getName() << "': " << *Load << '\n'); |
1732 | return false; |
1733 | } |
1734 | |
1735 | if (IsValueFullyAvailableInBlock(BB: Pred, FullyAvailableBlocks)) { |
1736 | continue; |
1737 | } |
1738 | |
1739 | if (Pred->getTerminator()->getNumSuccessors() != 1) { |
1740 | if (isa<IndirectBrInst>(Val: Pred->getTerminator())) { |
1741 | LLVM_DEBUG( |
1742 | dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" |
1743 | << Pred->getName() << "': " << *Load << '\n'); |
1744 | return false; |
1745 | } |
1746 | |
1747 | if (LoadBB->isEHPad()) { |
1748 | LLVM_DEBUG( |
1749 | dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" |
1750 | << Pred->getName() << "': " << *Load << '\n'); |
1751 | return false; |
1752 | } |
1753 | |
1754 | // Do not split backedge as it will break the canonical loop form. |
1755 | if (!isLoadPRESplitBackedgeEnabled()) |
1756 | if (DT->dominates(A: LoadBB, B: Pred)) { |
1757 | LLVM_DEBUG( |
1758 | dbgs() |
1759 | << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" |
1760 | << Pred->getName() << "': " << *Load << '\n'); |
1761 | return false; |
1762 | } |
1763 | |
1764 | if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load)) |
1765 | CriticalEdgePredAndLoad[Pred] = LI; |
1766 | else |
1767 | CriticalEdgePredSplit.push_back(Elt: Pred); |
1768 | } else { |
1769 | // Only add the predecessors that will not be split for now. |
1770 | PredLoads[Pred] = nullptr; |
1771 | } |
1772 | } |
1773 | |
1774 | // Decide whether PRE is profitable for this load. |
1775 | unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size(); |
1776 | unsigned NumUnavailablePreds = NumInsertPreds + |
1777 | CriticalEdgePredAndLoad.size(); |
1778 | assert(NumUnavailablePreds != 0 && |
1779 | "Fully available value should already be eliminated!" ); |
1780 | (void)NumUnavailablePreds; |
1781 | |
1782 | // If we need to insert new load in multiple predecessors, reject it. |
1783 | // FIXME: If we could restructure the CFG, we could make a common pred with |
1784 | // all the preds that don't have an available Load and insert a new load into |
1785 | // that one block. |
1786 | if (NumInsertPreds > 1) |
1787 | return false; |
1788 | |
1789 | // Now we know where we will insert load. We must ensure that it is safe |
1790 | // to speculatively execute the load at that points. |
1791 | if (MustEnsureSafetyOfSpeculativeExecution) { |
1792 | if (CriticalEdgePredSplit.size()) |
1793 | if (!isSafeToSpeculativelyExecute(I: Load, CtxI: &*LoadBB->getFirstNonPHIIt(), AC, |
1794 | DT)) |
1795 | return false; |
1796 | for (auto &PL : PredLoads) |
1797 | if (!isSafeToSpeculativelyExecute(I: Load, CtxI: PL.first->getTerminator(), AC, |
1798 | DT)) |
1799 | return false; |
1800 | for (auto &CEP : CriticalEdgePredAndLoad) |
1801 | if (!isSafeToSpeculativelyExecute(I: Load, CtxI: CEP.first->getTerminator(), AC, |
1802 | DT)) |
1803 | return false; |
1804 | } |
1805 | |
1806 | // Split critical edges, and update the unavailable predecessors accordingly. |
1807 | for (BasicBlock *OrigPred : CriticalEdgePredSplit) { |
1808 | BasicBlock *NewPred = splitCriticalEdges(Pred: OrigPred, Succ: LoadBB); |
1809 | assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!" ); |
1810 | PredLoads[NewPred] = nullptr; |
1811 | LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" |
1812 | << LoadBB->getName() << '\n'); |
1813 | } |
1814 | |
1815 | for (auto &CEP : CriticalEdgePredAndLoad) |
1816 | PredLoads[CEP.first] = nullptr; |
1817 | |
1818 | // Check if the load can safely be moved to all the unavailable predecessors. |
1819 | bool CanDoPRE = true; |
1820 | const DataLayout &DL = Load->getDataLayout(); |
1821 | SmallVector<Instruction*, 8> NewInsts; |
1822 | for (auto &PredLoad : PredLoads) { |
1823 | BasicBlock *UnavailablePred = PredLoad.first; |
1824 | |
1825 | // Do PHI translation to get its value in the predecessor if necessary. The |
1826 | // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. |
1827 | // We do the translation for each edge we skipped by going from Load's block |
1828 | // to LoadBB, otherwise we might miss pieces needing translation. |
1829 | |
1830 | // If all preds have a single successor, then we know it is safe to insert |
1831 | // the load on the pred (?!?), so we can insert code to materialize the |
1832 | // pointer if it is not available. |
1833 | Value *LoadPtr = Load->getPointerOperand(); |
1834 | BasicBlock *Cur = Load->getParent(); |
1835 | while (Cur != LoadBB) { |
1836 | PHITransAddr Address(LoadPtr, DL, AC); |
1837 | LoadPtr = Address.translateWithInsertion(CurBB: Cur, PredBB: Cur->getSinglePredecessor(), |
1838 | DT: *DT, NewInsts); |
1839 | if (!LoadPtr) { |
1840 | CanDoPRE = false; |
1841 | break; |
1842 | } |
1843 | Cur = Cur->getSinglePredecessor(); |
1844 | } |
1845 | |
1846 | if (LoadPtr) { |
1847 | PHITransAddr Address(LoadPtr, DL, AC); |
1848 | LoadPtr = Address.translateWithInsertion(CurBB: LoadBB, PredBB: UnavailablePred, DT: *DT, |
1849 | NewInsts); |
1850 | } |
1851 | // If we couldn't find or insert a computation of this phi translated value, |
1852 | // we fail PRE. |
1853 | if (!LoadPtr) { |
1854 | LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " |
1855 | << *Load->getPointerOperand() << "\n" ); |
1856 | CanDoPRE = false; |
1857 | break; |
1858 | } |
1859 | |
1860 | PredLoad.second = LoadPtr; |
1861 | } |
1862 | |
1863 | if (!CanDoPRE) { |
1864 | while (!NewInsts.empty()) { |
1865 | // Erase instructions generated by the failed PHI translation before |
1866 | // trying to number them. PHI translation might insert instructions |
1867 | // in basic blocks other than the current one, and we delete them |
1868 | // directly, as salvageAndRemoveInstruction only allows removing from the |
1869 | // current basic block. |
1870 | NewInsts.pop_back_val()->eraseFromParent(); |
1871 | } |
1872 | // HINT: Don't revert the edge-splitting as following transformation may |
1873 | // also need to split these critical edges. |
1874 | return !CriticalEdgePredSplit.empty(); |
1875 | } |
1876 | |
1877 | // Okay, we can eliminate this load by inserting a reload in the predecessor |
1878 | // and using PHI construction to get the value in the other predecessors, do |
1879 | // it. |
1880 | LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n'); |
1881 | LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size() |
1882 | << " INSTS: " << *NewInsts.back() |
1883 | << '\n'); |
1884 | |
1885 | // Assign value numbers to the new instructions. |
1886 | for (Instruction *I : NewInsts) { |
1887 | // Instructions that have been inserted in predecessor(s) to materialize |
1888 | // the load address do not retain their original debug locations. Doing |
1889 | // so could lead to confusing (but correct) source attributions. |
1890 | I->updateLocationAfterHoist(); |
1891 | |
1892 | // FIXME: We really _ought_ to insert these value numbers into their |
1893 | // parent's availability map. However, in doing so, we risk getting into |
1894 | // ordering issues. If a block hasn't been processed yet, we would be |
1895 | // marking a value as AVAIL-IN, which isn't what we intend. |
1896 | VN.lookupOrAdd(V: I); |
1897 | } |
1898 | |
1899 | eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads&: PredLoads, |
1900 | CriticalEdgePredAndLoad: &CriticalEdgePredAndLoad); |
1901 | ++NumPRELoad; |
1902 | return true; |
1903 | } |
1904 | |
1905 | bool GVNPass::performLoopLoadPRE(LoadInst *Load, |
1906 | AvailValInBlkVect &ValuesPerBlock, |
1907 | UnavailBlkVect &UnavailableBlocks) { |
1908 | const Loop *L = LI->getLoopFor(BB: Load->getParent()); |
1909 | // TODO: Generalize to other loop blocks that dominate the latch. |
1910 | if (!L || L->getHeader() != Load->getParent()) |
1911 | return false; |
1912 | |
1913 | BasicBlock * = L->getLoopPreheader(); |
1914 | BasicBlock *Latch = L->getLoopLatch(); |
1915 | if (!Preheader || !Latch) |
1916 | return false; |
1917 | |
1918 | Value *LoadPtr = Load->getPointerOperand(); |
1919 | // Must be available in preheader. |
1920 | if (!L->isLoopInvariant(V: LoadPtr)) |
1921 | return false; |
1922 | |
1923 | // We plan to hoist the load to preheader without introducing a new fault. |
1924 | // In order to do it, we need to prove that we cannot side-exit the loop |
1925 | // once loop header is first entered before execution of the load. |
1926 | if (ICF->isDominatedByICFIFromSameBlock(Insn: Load)) |
1927 | return false; |
1928 | |
1929 | BasicBlock *LoopBlock = nullptr; |
1930 | for (auto *Blocker : UnavailableBlocks) { |
1931 | // Blockers from outside the loop are handled in preheader. |
1932 | if (!L->contains(BB: Blocker)) |
1933 | continue; |
1934 | |
1935 | // Only allow one loop block. Loop header is not less frequently executed |
1936 | // than each loop block, and likely it is much more frequently executed. But |
1937 | // in case of multiple loop blocks, we need extra information (such as block |
1938 | // frequency info) to understand whether it is profitable to PRE into |
1939 | // multiple loop blocks. |
1940 | if (LoopBlock) |
1941 | return false; |
1942 | |
1943 | // Do not sink into inner loops. This may be non-profitable. |
1944 | if (L != LI->getLoopFor(BB: Blocker)) |
1945 | return false; |
1946 | |
1947 | // Blocks that dominate the latch execute on every single iteration, maybe |
1948 | // except the last one. So PREing into these blocks doesn't make much sense |
1949 | // in most cases. But the blocks that do not necessarily execute on each |
1950 | // iteration are sometimes much colder than the header, and this is when |
1951 | // PRE is potentially profitable. |
1952 | if (DT->dominates(A: Blocker, B: Latch)) |
1953 | return false; |
1954 | |
1955 | // Make sure that the terminator itself doesn't clobber. |
1956 | if (Blocker->getTerminator()->mayWriteToMemory()) |
1957 | return false; |
1958 | |
1959 | LoopBlock = Blocker; |
1960 | } |
1961 | |
1962 | if (!LoopBlock) |
1963 | return false; |
1964 | |
1965 | // Make sure the memory at this pointer cannot be freed, therefore we can |
1966 | // safely reload from it after clobber. |
1967 | if (LoadPtr->canBeFreed()) |
1968 | return false; |
1969 | |
1970 | // TODO: Support critical edge splitting if blocker has more than 1 successor. |
1971 | MapVector<BasicBlock *, Value *> AvailableLoads; |
1972 | AvailableLoads[LoopBlock] = LoadPtr; |
1973 | AvailableLoads[Preheader] = LoadPtr; |
1974 | |
1975 | LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n'); |
1976 | eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads, |
1977 | /*CriticalEdgePredAndLoad*/ nullptr); |
1978 | ++NumPRELoopLoad; |
1979 | return true; |
1980 | } |
1981 | |
1982 | static void (LoadInst *Load, Value *AvailableValue, |
1983 | OptimizationRemarkEmitter *ORE) { |
1984 | using namespace ore; |
1985 | |
1986 | ORE->emit(RemarkBuilder: [&]() { |
1987 | return OptimizationRemark(DEBUG_TYPE, "LoadElim" , Load) |
1988 | << "load of type " << NV("Type" , Load->getType()) << " eliminated" |
1989 | << setExtraArgs() << " in favor of " |
1990 | << NV("InfavorOfValue" , AvailableValue); |
1991 | }); |
1992 | } |
1993 | |
1994 | /// Attempt to eliminate a load whose dependencies are |
1995 | /// non-local by performing PHI construction. |
1996 | bool GVNPass::processNonLocalLoad(LoadInst *Load) { |
1997 | // Non-local speculations are not allowed under asan. |
1998 | if (Load->getParent()->getParent()->hasFnAttribute( |
1999 | Kind: Attribute::SanitizeAddress) || |
2000 | Load->getParent()->getParent()->hasFnAttribute( |
2001 | Kind: Attribute::SanitizeHWAddress)) |
2002 | return false; |
2003 | |
2004 | // Step 1: Find the non-local dependencies of the load. |
2005 | LoadDepVect Deps; |
2006 | MD->getNonLocalPointerDependency(QueryInst: Load, Result&: Deps); |
2007 | |
2008 | // If we had to process more than one hundred blocks to find the |
2009 | // dependencies, this load isn't worth worrying about. Optimizing |
2010 | // it will be too expensive. |
2011 | unsigned NumDeps = Deps.size(); |
2012 | if (NumDeps > MaxNumDeps) |
2013 | return false; |
2014 | |
2015 | // If we had a phi translation failure, we'll have a single entry which is a |
2016 | // clobber in the current block. Reject this early. |
2017 | if (NumDeps == 1 && |
2018 | !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { |
2019 | LLVM_DEBUG(dbgs() << "GVN: non-local load " ; Load->printAsOperand(dbgs()); |
2020 | dbgs() << " has unknown dependencies\n" ;); |
2021 | return false; |
2022 | } |
2023 | |
2024 | bool Changed = false; |
2025 | // If this load follows a GEP, see if we can PRE the indices before analyzing. |
2026 | if (GetElementPtrInst *GEP = |
2027 | dyn_cast<GetElementPtrInst>(Val: Load->getOperand(i_nocapture: 0))) { |
2028 | for (Use &U : GEP->indices()) |
2029 | if (Instruction *I = dyn_cast<Instruction>(Val: U.get())) |
2030 | Changed |= performScalarPRE(I); |
2031 | } |
2032 | |
2033 | // Step 2: Analyze the availability of the load. |
2034 | AvailValInBlkVect ValuesPerBlock; |
2035 | UnavailBlkVect UnavailableBlocks; |
2036 | AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks); |
2037 | |
2038 | // If we have no predecessors that produce a known value for this load, exit |
2039 | // early. |
2040 | if (ValuesPerBlock.empty()) |
2041 | return Changed; |
2042 | |
2043 | // Step 3: Eliminate fully redundancy. |
2044 | // |
2045 | // If all of the instructions we depend on produce a known value for this |
2046 | // load, then it is fully redundant and we can use PHI insertion to compute |
2047 | // its value. Insert PHIs and remove the fully redundant value now. |
2048 | if (UnavailableBlocks.empty()) { |
2049 | LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n'); |
2050 | |
2051 | // Perform PHI construction. |
2052 | Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, GVN&: *this); |
2053 | // ConstructSSAForLoadSet is responsible for combining metadata. |
2054 | ICF->removeUsersOf(Inst: Load); |
2055 | Load->replaceAllUsesWith(V); |
2056 | |
2057 | if (isa<PHINode>(Val: V)) |
2058 | V->takeName(V: Load); |
2059 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
2060 | // If instruction I has debug info, then we should not update it. |
2061 | // Also, if I has a null DebugLoc, then it is still potentially incorrect |
2062 | // to propagate Load's DebugLoc because Load may not post-dominate I. |
2063 | if (Load->getDebugLoc() && Load->getParent() == I->getParent()) |
2064 | I->setDebugLoc(Load->getDebugLoc()); |
2065 | if (V->getType()->isPtrOrPtrVectorTy()) |
2066 | MD->invalidateCachedPointerInfo(Ptr: V); |
2067 | ++NumGVNLoad; |
2068 | reportLoadElim(Load, AvailableValue: V, ORE); |
2069 | salvageAndRemoveInstruction(I: Load); |
2070 | return true; |
2071 | } |
2072 | |
2073 | // Step 4: Eliminate partial redundancy. |
2074 | if (!isPREEnabled() || !isLoadPREEnabled()) |
2075 | return Changed; |
2076 | if (!isLoadInLoopPREEnabled() && LI->getLoopFor(BB: Load->getParent())) |
2077 | return Changed; |
2078 | |
2079 | if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) || |
2080 | PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks)) |
2081 | return true; |
2082 | |
2083 | return Changed; |
2084 | } |
2085 | |
2086 | static bool hasUsersIn(Value *V, BasicBlock *BB) { |
2087 | return any_of(Range: V->users(), P: [BB](User *U) { |
2088 | auto *I = dyn_cast<Instruction>(Val: U); |
2089 | return I && I->getParent() == BB; |
2090 | }); |
2091 | } |
2092 | |
2093 | bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { |
2094 | Value *V = IntrinsicI->getArgOperand(i: 0); |
2095 | |
2096 | if (ConstantInt *Cond = dyn_cast<ConstantInt>(Val: V)) { |
2097 | if (Cond->isZero()) { |
2098 | Type *Int8Ty = Type::getInt8Ty(C&: V->getContext()); |
2099 | Type *PtrTy = PointerType::get(C&: V->getContext(), AddressSpace: 0); |
2100 | // Insert a new store to null instruction before the load to indicate that |
2101 | // this code is not reachable. FIXME: We could insert unreachable |
2102 | // instruction directly because we can modify the CFG. |
2103 | auto *NewS = |
2104 | new StoreInst(PoisonValue::get(T: Int8Ty), Constant::getNullValue(Ty: PtrTy), |
2105 | IntrinsicI->getIterator()); |
2106 | if (MSSAU) { |
2107 | const MemoryUseOrDef *FirstNonDom = nullptr; |
2108 | const auto *AL = |
2109 | MSSAU->getMemorySSA()->getBlockAccesses(BB: IntrinsicI->getParent()); |
2110 | |
2111 | // If there are accesses in the current basic block, find the first one |
2112 | // that does not come before NewS. The new memory access is inserted |
2113 | // after the found access or before the terminator if no such access is |
2114 | // found. |
2115 | if (AL) { |
2116 | for (const auto &Acc : *AL) { |
2117 | if (auto *Current = dyn_cast<MemoryUseOrDef>(Val: &Acc)) |
2118 | if (!Current->getMemoryInst()->comesBefore(Other: NewS)) { |
2119 | FirstNonDom = Current; |
2120 | break; |
2121 | } |
2122 | } |
2123 | } |
2124 | |
2125 | auto *NewDef = |
2126 | FirstNonDom ? MSSAU->createMemoryAccessBefore( |
2127 | I: NewS, Definition: nullptr, |
2128 | InsertPt: const_cast<MemoryUseOrDef *>(FirstNonDom)) |
2129 | : MSSAU->createMemoryAccessInBB( |
2130 | I: NewS, Definition: nullptr, |
2131 | BB: NewS->getParent(), Point: MemorySSA::BeforeTerminator); |
2132 | |
2133 | MSSAU->insertDef(Def: cast<MemoryDef>(Val: NewDef), /*RenameUses=*/false); |
2134 | } |
2135 | } |
2136 | if (isAssumeWithEmptyBundle(Assume: *IntrinsicI)) { |
2137 | salvageAndRemoveInstruction(I: IntrinsicI); |
2138 | return true; |
2139 | } |
2140 | return false; |
2141 | } |
2142 | |
2143 | if (isa<Constant>(Val: V)) { |
2144 | // If it's not false, and constant, it must evaluate to true. This means our |
2145 | // assume is assume(true), and thus, pointless, and we don't want to do |
2146 | // anything more here. |
2147 | return false; |
2148 | } |
2149 | |
2150 | Constant *True = ConstantInt::getTrue(Context&: V->getContext()); |
2151 | bool Changed = false; |
2152 | |
2153 | for (BasicBlock *Successor : successors(BB: IntrinsicI->getParent())) { |
2154 | BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); |
2155 | |
2156 | // This property is only true in dominated successors, propagateEquality |
2157 | // will check dominance for us. |
2158 | Changed |= propagateEquality(LHS: V, RHS: True, Root: Edge, DominatesByEdge: false); |
2159 | } |
2160 | |
2161 | // We can replace assume value with true, which covers cases like this: |
2162 | // call void @llvm.assume(i1 %cmp) |
2163 | // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true |
2164 | ReplaceOperandsWithMap[V] = True; |
2165 | |
2166 | // Similarly, after assume(!NotV) we know that NotV == false. |
2167 | Value *NotV; |
2168 | if (match(V, P: m_Not(V: m_Value(V&: NotV)))) |
2169 | ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(Context&: V->getContext()); |
2170 | |
2171 | // If we find an equality fact, canonicalize all dominated uses in this block |
2172 | // to one of the two values. We heuristically choice the "oldest" of the |
2173 | // two where age is determined by value number. (Note that propagateEquality |
2174 | // above handles the cross block case.) |
2175 | // |
2176 | // Key case to cover are: |
2177 | // 1) |
2178 | // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen |
2179 | // call void @llvm.assume(i1 %cmp) |
2180 | // ret float %0 ; will change it to ret float 3.000000e+00 |
2181 | // 2) |
2182 | // %load = load float, float* %addr |
2183 | // %cmp = fcmp oeq float %load, %0 |
2184 | // call void @llvm.assume(i1 %cmp) |
2185 | // ret float %load ; will change it to ret float %0 |
2186 | if (auto *CmpI = dyn_cast<CmpInst>(Val: V)) { |
2187 | if (CmpI->isEquivalence()) { |
2188 | Value *CmpLHS = CmpI->getOperand(i_nocapture: 0); |
2189 | Value *CmpRHS = CmpI->getOperand(i_nocapture: 1); |
2190 | // Heuristically pick the better replacement -- the choice of heuristic |
2191 | // isn't terribly important here, but the fact we canonicalize on some |
2192 | // replacement is for exposing other simplifications. |
2193 | // TODO: pull this out as a helper function and reuse w/ existing |
2194 | // (slightly different) logic. |
2195 | if (isa<Constant>(Val: CmpLHS) && !isa<Constant>(Val: CmpRHS)) |
2196 | std::swap(a&: CmpLHS, b&: CmpRHS); |
2197 | if (!isa<Instruction>(Val: CmpLHS) && isa<Instruction>(Val: CmpRHS)) |
2198 | std::swap(a&: CmpLHS, b&: CmpRHS); |
2199 | if ((isa<Argument>(Val: CmpLHS) && isa<Argument>(Val: CmpRHS)) || |
2200 | (isa<Instruction>(Val: CmpLHS) && isa<Instruction>(Val: CmpRHS))) { |
2201 | // Move the 'oldest' value to the right-hand side, using the value |
2202 | // number as a proxy for age. |
2203 | uint32_t LVN = VN.lookupOrAdd(V: CmpLHS); |
2204 | uint32_t RVN = VN.lookupOrAdd(V: CmpRHS); |
2205 | if (LVN < RVN) |
2206 | std::swap(a&: CmpLHS, b&: CmpRHS); |
2207 | } |
2208 | |
2209 | // Handle degenerate case where we either haven't pruned a dead path or a |
2210 | // removed a trivial assume yet. |
2211 | if (isa<Constant>(Val: CmpLHS) && isa<Constant>(Val: CmpRHS)) |
2212 | return Changed; |
2213 | |
2214 | LLVM_DEBUG(dbgs() << "Replacing dominated uses of " |
2215 | << *CmpLHS << " with " |
2216 | << *CmpRHS << " in block " |
2217 | << IntrinsicI->getParent()->getName() << "\n" ); |
2218 | |
2219 | // Setup the replacement map - this handles uses within the same block. |
2220 | if (hasUsersIn(V: CmpLHS, BB: IntrinsicI->getParent())) |
2221 | ReplaceOperandsWithMap[CmpLHS] = CmpRHS; |
2222 | |
2223 | // NOTE: The non-block local cases are handled by the call to |
2224 | // propagateEquality above; this block is just about handling the block |
2225 | // local cases. TODO: There's a bunch of logic in propagateEqualiy which |
2226 | // isn't duplicated for the block local case, can we share it somehow? |
2227 | } |
2228 | } |
2229 | return Changed; |
2230 | } |
2231 | |
2232 | static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { |
2233 | patchReplacementInstruction(I, Repl); |
2234 | I->replaceAllUsesWith(V: Repl); |
2235 | } |
2236 | |
2237 | /// Attempt to eliminate a load, first by eliminating it |
2238 | /// locally, and then attempting non-local elimination if that fails. |
2239 | bool GVNPass::processLoad(LoadInst *L) { |
2240 | if (!MD) |
2241 | return false; |
2242 | |
2243 | // This code hasn't been audited for ordered or volatile memory access. |
2244 | if (!L->isUnordered()) |
2245 | return false; |
2246 | |
2247 | if (L->use_empty()) { |
2248 | salvageAndRemoveInstruction(I: L); |
2249 | return true; |
2250 | } |
2251 | |
2252 | // ... to a pointer that has been loaded from before... |
2253 | MemDepResult Dep = MD->getDependency(QueryInst: L); |
2254 | |
2255 | // If it is defined in another block, try harder. |
2256 | if (Dep.isNonLocal()) |
2257 | return processNonLocalLoad(Load: L); |
2258 | |
2259 | // Only handle the local case below. |
2260 | if (!Dep.isLocal()) { |
2261 | // This might be a NonFuncLocal or an Unknown. |
2262 | LLVM_DEBUG( |
2263 | // fast print dep, using operator<< on instruction is too slow. |
2264 | dbgs() << "GVN: load " ; L->printAsOperand(dbgs()); |
2265 | dbgs() << " has unknown dependence\n" ;); |
2266 | return false; |
2267 | } |
2268 | |
2269 | auto AV = AnalyzeLoadAvailability(Load: L, DepInfo: Dep, Address: L->getPointerOperand()); |
2270 | if (!AV) |
2271 | return false; |
2272 | |
2273 | Value *AvailableValue = AV->MaterializeAdjustedValue(Load: L, InsertPt: L); |
2274 | |
2275 | // MaterializeAdjustedValue is responsible for combining metadata. |
2276 | ICF->removeUsersOf(Inst: L); |
2277 | L->replaceAllUsesWith(V: AvailableValue); |
2278 | if (MSSAU) |
2279 | MSSAU->removeMemoryAccess(I: L); |
2280 | ++NumGVNLoad; |
2281 | reportLoadElim(Load: L, AvailableValue, ORE); |
2282 | salvageAndRemoveInstruction(I: L); |
2283 | // Tell MDA to reexamine the reused pointer since we might have more |
2284 | // information after forwarding it. |
2285 | if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) |
2286 | MD->invalidateCachedPointerInfo(Ptr: AvailableValue); |
2287 | return true; |
2288 | } |
2289 | |
2290 | /// Return a pair the first field showing the value number of \p Exp and the |
2291 | /// second field showing whether it is a value number newly created. |
2292 | std::pair<uint32_t, bool> |
2293 | GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) { |
2294 | uint32_t &E = ExpressionNumbering[Exp]; |
2295 | bool CreateNewValNum = !E; |
2296 | if (CreateNewValNum) { |
2297 | Expressions.push_back(x: Exp); |
2298 | if (ExprIdx.size() < NextValueNumber + 1) |
2299 | ExprIdx.resize(new_size: NextValueNumber * 2); |
2300 | E = NextValueNumber; |
2301 | ExprIdx[NextValueNumber++] = NextExprNumber++; |
2302 | } |
2303 | return {E, CreateNewValNum}; |
2304 | } |
2305 | |
2306 | /// Return whether all the values related with the same \p num are |
2307 | /// defined in \p BB. |
2308 | bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, |
2309 | GVNPass &GVN) { |
2310 | return all_of( |
2311 | Range: GVN.LeaderTable.getLeaders(N: Num), |
2312 | P: [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; }); |
2313 | } |
2314 | |
2315 | /// Wrap phiTranslateImpl to provide caching functionality. |
2316 | uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred, |
2317 | const BasicBlock *PhiBlock, |
2318 | uint32_t Num, GVNPass &GVN) { |
2319 | auto FindRes = PhiTranslateTable.find(Val: {Num, Pred}); |
2320 | if (FindRes != PhiTranslateTable.end()) |
2321 | return FindRes->second; |
2322 | uint32_t NewNum = phiTranslateImpl(BB: Pred, PhiBlock, Num, GVN); |
2323 | PhiTranslateTable.insert(KV: {{Num, Pred}, NewNum}); |
2324 | return NewNum; |
2325 | } |
2326 | |
2327 | // Return true if the value number \p Num and NewNum have equal value. |
2328 | // Return false if the result is unknown. |
2329 | bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, |
2330 | const BasicBlock *Pred, |
2331 | const BasicBlock *PhiBlock, |
2332 | GVNPass &GVN) { |
2333 | CallInst *Call = nullptr; |
2334 | auto Leaders = GVN.LeaderTable.getLeaders(N: Num); |
2335 | for (const auto &Entry : Leaders) { |
2336 | Call = dyn_cast<CallInst>(Val: Entry.Val); |
2337 | if (Call && Call->getParent() == PhiBlock) |
2338 | break; |
2339 | } |
2340 | |
2341 | if (AA->doesNotAccessMemory(Call)) |
2342 | return true; |
2343 | |
2344 | if (!MD || !AA->onlyReadsMemory(Call)) |
2345 | return false; |
2346 | |
2347 | MemDepResult LocalDep = MD->getDependency(QueryInst: Call); |
2348 | if (!LocalDep.isNonLocal()) |
2349 | return false; |
2350 | |
2351 | const MemoryDependenceResults::NonLocalDepInfo &Deps = |
2352 | MD->getNonLocalCallDependency(QueryCall: Call); |
2353 | |
2354 | // Check to see if the Call has no function local clobber. |
2355 | for (const NonLocalDepEntry &D : Deps) { |
2356 | if (D.getResult().isNonFuncLocal()) |
2357 | return true; |
2358 | } |
2359 | return false; |
2360 | } |
2361 | |
2362 | /// Translate value number \p Num using phis, so that it has the values of |
2363 | /// the phis in BB. |
2364 | uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred, |
2365 | const BasicBlock *PhiBlock, |
2366 | uint32_t Num, GVNPass &GVN) { |
2367 | // See if we can refine the value number by looking at the PN incoming value |
2368 | // for the given predecessor. |
2369 | if (PHINode *PN = NumberingPhi[Num]) { |
2370 | if (PN->getParent() == PhiBlock) |
2371 | for (unsigned I = 0; I != PN->getNumIncomingValues(); ++I) |
2372 | if (PN->getIncomingBlock(i: I) == Pred) |
2373 | if (uint32_t TransVal = lookup(V: PN->getIncomingValue(i: I), Verify: false)) |
2374 | return TransVal; |
2375 | return Num; |
2376 | } |
2377 | |
2378 | if (BasicBlock *BB = NumberingBB[Num]) { |
2379 | assert(MSSA && "NumberingBB is non-empty only when using MemorySSA" ); |
2380 | // Value numbers of basic blocks are used to represent memory state in |
2381 | // load/store instructions and read-only function calls when said state is |
2382 | // set by a MemoryPhi. |
2383 | if (BB != PhiBlock) |
2384 | return Num; |
2385 | MemoryPhi *MPhi = MSSA->getMemoryAccess(BB); |
2386 | for (unsigned i = 0, N = MPhi->getNumIncomingValues(); i != N; ++i) { |
2387 | if (MPhi->getIncomingBlock(I: i) != Pred) |
2388 | continue; |
2389 | MemoryAccess *MA = MPhi->getIncomingValue(I: i); |
2390 | if (auto *PredPhi = dyn_cast<MemoryPhi>(Val: MA)) |
2391 | return lookupOrAdd(V: PredPhi->getBlock()); |
2392 | if (MSSA->isLiveOnEntryDef(MA)) |
2393 | return lookupOrAdd(V: &BB->getParent()->getEntryBlock()); |
2394 | return lookupOrAdd(V: cast<MemoryUseOrDef>(Val: MA)->getMemoryInst()); |
2395 | } |
2396 | llvm_unreachable( |
2397 | "CFG/MemorySSA mismatch: predecessor not found among incoming blocks" ); |
2398 | } |
2399 | |
2400 | // If there is any value related with Num is defined in a BB other than |
2401 | // PhiBlock, it cannot depend on a phi in PhiBlock without going through |
2402 | // a backedge. We can do an early exit in that case to save compile time. |
2403 | if (!areAllValsInBB(Num, BB: PhiBlock, GVN)) |
2404 | return Num; |
2405 | |
2406 | if (Num >= ExprIdx.size() || ExprIdx[Num] == 0) |
2407 | return Num; |
2408 | Expression Exp = Expressions[ExprIdx[Num]]; |
2409 | |
2410 | for (unsigned I = 0; I < Exp.VarArgs.size(); I++) { |
2411 | // For InsertValue and ExtractValue, some varargs are index numbers |
2412 | // instead of value numbers. Those index numbers should not be |
2413 | // translated. |
2414 | if ((I > 1 && Exp.Opcode == Instruction::InsertValue) || |
2415 | (I > 0 && Exp.Opcode == Instruction::ExtractValue) || |
2416 | (I > 1 && Exp.Opcode == Instruction::ShuffleVector)) |
2417 | continue; |
2418 | Exp.VarArgs[I] = phiTranslate(Pred, PhiBlock, Num: Exp.VarArgs[I], GVN); |
2419 | } |
2420 | |
2421 | if (Exp.Commutative) { |
2422 | assert(Exp.VarArgs.size() >= 2 && "Unsupported commutative instruction!" ); |
2423 | if (Exp.VarArgs[0] > Exp.VarArgs[1]) { |
2424 | std::swap(a&: Exp.VarArgs[0], b&: Exp.VarArgs[1]); |
2425 | uint32_t Opcode = Exp.Opcode >> 8; |
2426 | if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) |
2427 | Exp.Opcode = (Opcode << 8) | |
2428 | CmpInst::getSwappedPredicate( |
2429 | pred: static_cast<CmpInst::Predicate>(Exp.Opcode & 255)); |
2430 | } |
2431 | } |
2432 | |
2433 | if (uint32_t NewNum = ExpressionNumbering[Exp]) { |
2434 | if (Exp.Opcode == Instruction::Call && NewNum != Num) |
2435 | return areCallValsEqual(Num, NewNum, Pred, PhiBlock, GVN) ? NewNum : Num; |
2436 | return NewNum; |
2437 | } |
2438 | return Num; |
2439 | } |
2440 | |
2441 | /// Erase stale entry from phiTranslate cache so phiTranslate can be computed |
2442 | /// again. |
2443 | void GVNPass::ValueTable::eraseTranslateCacheEntry( |
2444 | uint32_t Num, const BasicBlock &CurrBlock) { |
2445 | for (const BasicBlock *Pred : predecessors(BB: &CurrBlock)) |
2446 | PhiTranslateTable.erase(Val: {Num, Pred}); |
2447 | } |
2448 | |
2449 | // In order to find a leader for a given value number at a |
2450 | // specific basic block, we first obtain the list of all Values for that number, |
2451 | // and then scan the list to find one whose block dominates the block in |
2452 | // question. This is fast because dominator tree queries consist of only |
2453 | // a few comparisons of DFS numbers. |
2454 | Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t Num) { |
2455 | auto Leaders = LeaderTable.getLeaders(N: Num); |
2456 | if (Leaders.empty()) |
2457 | return nullptr; |
2458 | |
2459 | Value *Val = nullptr; |
2460 | for (const auto &Entry : Leaders) { |
2461 | if (DT->dominates(A: Entry.BB, B: BB)) { |
2462 | Val = Entry.Val; |
2463 | if (isa<Constant>(Val)) |
2464 | return Val; |
2465 | } |
2466 | } |
2467 | |
2468 | return Val; |
2469 | } |
2470 | |
2471 | /// There is an edge from 'Src' to 'Dst'. Return |
2472 | /// true if every path from the entry block to 'Dst' passes via this edge. In |
2473 | /// particular 'Dst' must not be reachable via another edge from 'Src'. |
2474 | static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, |
2475 | DominatorTree *DT) { |
2476 | // While in theory it is interesting to consider the case in which Dst has |
2477 | // more than one predecessor, because Dst might be part of a loop which is |
2478 | // only reachable from Src, in practice it is pointless since at the time |
2479 | // GVN runs all such loops have preheaders, which means that Dst will have |
2480 | // been changed to have only one predecessor, namely Src. |
2481 | const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); |
2482 | assert((!Pred || Pred == E.getStart()) && |
2483 | "No edge between these basic blocks!" ); |
2484 | return Pred != nullptr; |
2485 | } |
2486 | |
2487 | void GVNPass::assignBlockRPONumber(Function &F) { |
2488 | BlockRPONumber.clear(); |
2489 | uint32_t NextBlockNumber = 1; |
2490 | ReversePostOrderTraversal<Function *> RPOT(&F); |
2491 | for (BasicBlock *BB : RPOT) |
2492 | BlockRPONumber[BB] = NextBlockNumber++; |
2493 | InvalidBlockRPONumbers = false; |
2494 | } |
2495 | |
2496 | bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const { |
2497 | bool Changed = false; |
2498 | for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { |
2499 | Value *Operand = Instr->getOperand(i: OpNum); |
2500 | auto It = ReplaceOperandsWithMap.find(Key: Operand); |
2501 | if (It != ReplaceOperandsWithMap.end()) { |
2502 | LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " |
2503 | << *It->second << " in instruction " << *Instr << '\n'); |
2504 | Instr->setOperand(i: OpNum, Val: It->second); |
2505 | Changed = true; |
2506 | } |
2507 | } |
2508 | return Changed; |
2509 | } |
2510 | |
2511 | /// The given values are known to be equal in every block |
2512 | /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with |
2513 | /// 'RHS' everywhere in the scope. Returns whether a change was made. |
2514 | /// If DominatesByEdge is false, then it means that we will propagate the RHS |
2515 | /// value starting from the end of Root.Start. |
2516 | bool GVNPass::propagateEquality(Value *LHS, Value *RHS, |
2517 | const BasicBlockEdge &Root, |
2518 | bool DominatesByEdge) { |
2519 | SmallVector<std::pair<Value*, Value*>, 4> Worklist; |
2520 | Worklist.push_back(Elt: std::make_pair(x&: LHS, y&: RHS)); |
2521 | bool Changed = false; |
2522 | // For speed, compute a conservative fast approximation to |
2523 | // DT->dominates(Root, Root.getEnd()); |
2524 | const bool RootDominatesEnd = isOnlyReachableViaThisEdge(E: Root, DT); |
2525 | |
2526 | while (!Worklist.empty()) { |
2527 | std::pair<Value*, Value*> Item = Worklist.pop_back_val(); |
2528 | LHS = Item.first; RHS = Item.second; |
2529 | |
2530 | if (LHS == RHS) |
2531 | continue; |
2532 | assert(LHS->getType() == RHS->getType() && "Equality but unequal types!" ); |
2533 | |
2534 | // Don't try to propagate equalities between constants. |
2535 | if (isa<Constant>(Val: LHS) && isa<Constant>(Val: RHS)) |
2536 | continue; |
2537 | |
2538 | // Prefer a constant on the right-hand side, or an Argument if no constants. |
2539 | if (isa<Constant>(Val: LHS) || (isa<Argument>(Val: LHS) && !isa<Constant>(Val: RHS))) |
2540 | std::swap(a&: LHS, b&: RHS); |
2541 | assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!" ); |
2542 | const DataLayout &DL = |
2543 | isa<Argument>(Val: LHS) |
2544 | ? cast<Argument>(Val: LHS)->getParent()->getDataLayout() |
2545 | : cast<Instruction>(Val: LHS)->getDataLayout(); |
2546 | |
2547 | // If there is no obvious reason to prefer the left-hand side over the |
2548 | // right-hand side, ensure the longest lived term is on the right-hand side, |
2549 | // so the shortest lived term will be replaced by the longest lived. |
2550 | // This tends to expose more simplifications. |
2551 | uint32_t LVN = VN.lookupOrAdd(V: LHS); |
2552 | if ((isa<Argument>(Val: LHS) && isa<Argument>(Val: RHS)) || |
2553 | (isa<Instruction>(Val: LHS) && isa<Instruction>(Val: RHS))) { |
2554 | // Move the 'oldest' value to the right-hand side, using the value number |
2555 | // as a proxy for age. |
2556 | uint32_t RVN = VN.lookupOrAdd(V: RHS); |
2557 | if (LVN < RVN) { |
2558 | std::swap(a&: LHS, b&: RHS); |
2559 | LVN = RVN; |
2560 | } |
2561 | } |
2562 | |
2563 | // If value numbering later sees that an instruction in the scope is equal |
2564 | // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve |
2565 | // the invariant that instructions only occur in the leader table for their |
2566 | // own value number (this is used by removeFromLeaderTable), do not do this |
2567 | // if RHS is an instruction (if an instruction in the scope is morphed into |
2568 | // LHS then it will be turned into RHS by the next GVN iteration anyway, so |
2569 | // using the leader table is about compiling faster, not optimizing better). |
2570 | // The leader table only tracks basic blocks, not edges. Only add to if we |
2571 | // have the simple case where the edge dominates the end. |
2572 | if (RootDominatesEnd && !isa<Instruction>(Val: RHS) && |
2573 | canReplacePointersIfEqual(From: LHS, To: RHS, DL)) |
2574 | LeaderTable.insert(N: LVN, V: RHS, BB: Root.getEnd()); |
2575 | |
2576 | // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As |
2577 | // LHS always has at least one use that is not dominated by Root, this will |
2578 | // never do anything if LHS has only one use. |
2579 | if (!LHS->hasOneUse()) { |
2580 | // Create a callback that captures the DL. |
2581 | auto CanReplacePointersCallBack = [&DL](const Use &U, const Value *To) { |
2582 | return canReplacePointersInUseIfEqual(U, To, DL); |
2583 | }; |
2584 | unsigned NumReplacements = |
2585 | DominatesByEdge |
2586 | ? replaceDominatedUsesWithIf(From: LHS, To: RHS, DT&: *DT, Edge: Root, |
2587 | ShouldReplace: CanReplacePointersCallBack) |
2588 | : replaceDominatedUsesWithIf(From: LHS, To: RHS, DT&: *DT, BB: Root.getStart(), |
2589 | ShouldReplace: CanReplacePointersCallBack); |
2590 | |
2591 | if (NumReplacements > 0) { |
2592 | Changed = true; |
2593 | NumGVNEqProp += NumReplacements; |
2594 | // Cached information for anything that uses LHS will be invalid. |
2595 | if (MD) |
2596 | MD->invalidateCachedPointerInfo(Ptr: LHS); |
2597 | } |
2598 | } |
2599 | |
2600 | // Now try to deduce additional equalities from this one. For example, if |
2601 | // the known equality was "(A != B)" == "false" then it follows that A and B |
2602 | // are equal in the scope. Only boolean equalities with an explicit true or |
2603 | // false RHS are currently supported. |
2604 | if (!RHS->getType()->isIntegerTy(Bitwidth: 1)) |
2605 | // Not a boolean equality - bail out. |
2606 | continue; |
2607 | ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS); |
2608 | if (!CI) |
2609 | // RHS neither 'true' nor 'false' - bail out. |
2610 | continue; |
2611 | // Whether RHS equals 'true'. Otherwise it equals 'false'. |
2612 | bool IsKnownTrue = CI->isMinusOne(); |
2613 | bool IsKnownFalse = !IsKnownTrue; |
2614 | |
2615 | // If "A && B" is known true then both A and B are known true. If "A || B" |
2616 | // is known false then both A and B are known false. |
2617 | Value *A, *B; |
2618 | if ((IsKnownTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) || |
2619 | (IsKnownFalse && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) { |
2620 | Worklist.push_back(Elt: std::make_pair(x&: A, y&: RHS)); |
2621 | Worklist.push_back(Elt: std::make_pair(x&: B, y&: RHS)); |
2622 | continue; |
2623 | } |
2624 | |
2625 | // If we are propagating an equality like "(A == B)" == "true" then also |
2626 | // propagate the equality A == B. When propagating a comparison such as |
2627 | // "(A >= B)" == "true", replace all instances of "A < B" with "false". |
2628 | if (CmpInst *Cmp = dyn_cast<CmpInst>(Val: LHS)) { |
2629 | Value *Op0 = Cmp->getOperand(i_nocapture: 0), *Op1 = Cmp->getOperand(i_nocapture: 1); |
2630 | |
2631 | // If "A == B" is known true, or "A != B" is known false, then replace |
2632 | // A with B everywhere in the scope. For floating point operations, we |
2633 | // have to be careful since equality does not always imply equivalance. |
2634 | if (Cmp->isEquivalence(Invert: IsKnownFalse)) |
2635 | Worklist.push_back(Elt: std::make_pair(x&: Op0, y&: Op1)); |
2636 | |
2637 | // If "A >= B" is known true, replace "A < B" with false everywhere. |
2638 | CmpInst::Predicate NotPred = Cmp->getInversePredicate(); |
2639 | Constant *NotVal = ConstantInt::get(Ty: Cmp->getType(), V: IsKnownFalse); |
2640 | // Since we don't have the instruction "A < B" immediately to hand, work |
2641 | // out the value number that it would have and use that to find an |
2642 | // appropriate instruction (if any). |
2643 | uint32_t NextNum = VN.getNextUnusedValueNumber(); |
2644 | uint32_t Num = VN.lookupOrAddCmp(Opcode: Cmp->getOpcode(), Predicate: NotPred, LHS: Op0, RHS: Op1); |
2645 | // If the number we were assigned was brand new then there is no point in |
2646 | // looking for an instruction realizing it: there cannot be one! |
2647 | if (Num < NextNum) { |
2648 | Value *NotCmp = findLeader(BB: Root.getEnd(), Num); |
2649 | if (NotCmp && isa<Instruction>(Val: NotCmp)) { |
2650 | unsigned NumReplacements = |
2651 | DominatesByEdge |
2652 | ? replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, Edge: Root) |
2653 | : replaceDominatedUsesWith(From: NotCmp, To: NotVal, DT&: *DT, |
2654 | BB: Root.getStart()); |
2655 | Changed |= NumReplacements > 0; |
2656 | NumGVNEqProp += NumReplacements; |
2657 | // Cached information for anything that uses NotCmp will be invalid. |
2658 | if (MD) |
2659 | MD->invalidateCachedPointerInfo(Ptr: NotCmp); |
2660 | } |
2661 | } |
2662 | // Ensure that any instruction in scope that gets the "A < B" value number |
2663 | // is replaced with false. |
2664 | // The leader table only tracks basic blocks, not edges. Only add to if we |
2665 | // have the simple case where the edge dominates the end. |
2666 | if (RootDominatesEnd) |
2667 | LeaderTable.insert(N: Num, V: NotVal, BB: Root.getEnd()); |
2668 | |
2669 | continue; |
2670 | } |
2671 | |
2672 | // Propagate equalities that results from truncation with no unsigned wrap |
2673 | // like (trunc nuw i64 %v to i1) == "true" or (trunc nuw i64 %v to i1) == |
2674 | // "false" |
2675 | if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V&: A)))) { |
2676 | Worklist.emplace_back(Args&: A, Args: ConstantInt::get(Ty: A->getType(), V: IsKnownTrue)); |
2677 | continue; |
2678 | } |
2679 | } |
2680 | |
2681 | return Changed; |
2682 | } |
2683 | |
2684 | /// When calculating availability, handle an instruction |
2685 | /// by inserting it into the appropriate sets. |
2686 | bool GVNPass::processInstruction(Instruction *I) { |
2687 | // If the instruction can be easily simplified then do so now in preference |
2688 | // to value numbering it. Value numbering often exposes redundancies, for |
2689 | // example if it determines that %y is equal to %x then the instruction |
2690 | // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. |
2691 | const DataLayout &DL = I->getDataLayout(); |
2692 | if (Value *V = simplifyInstruction(I, Q: {DL, TLI, DT, AC})) { |
2693 | bool Changed = false; |
2694 | if (!I->use_empty()) { |
2695 | // Simplification can cause a special instruction to become not special. |
2696 | // For example, devirtualization to a willreturn function. |
2697 | ICF->removeUsersOf(Inst: I); |
2698 | I->replaceAllUsesWith(V); |
2699 | Changed = true; |
2700 | } |
2701 | if (isInstructionTriviallyDead(I, TLI)) { |
2702 | salvageAndRemoveInstruction(I); |
2703 | Changed = true; |
2704 | } |
2705 | if (Changed) { |
2706 | if (MD && V->getType()->isPtrOrPtrVectorTy()) |
2707 | MD->invalidateCachedPointerInfo(Ptr: V); |
2708 | ++NumGVNSimpl; |
2709 | return true; |
2710 | } |
2711 | } |
2712 | |
2713 | if (auto *Assume = dyn_cast<AssumeInst>(Val: I)) |
2714 | return processAssumeIntrinsic(IntrinsicI: Assume); |
2715 | |
2716 | if (LoadInst *Load = dyn_cast<LoadInst>(Val: I)) { |
2717 | if (processLoad(L: Load)) |
2718 | return true; |
2719 | |
2720 | unsigned Num = VN.lookupOrAdd(V: Load); |
2721 | LeaderTable.insert(N: Num, V: Load, BB: Load->getParent()); |
2722 | return false; |
2723 | } |
2724 | |
2725 | // For conditional branches, we can perform simple conditional propagation on |
2726 | // the condition value itself. |
2727 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: I)) { |
2728 | if (!BI->isConditional()) |
2729 | return false; |
2730 | |
2731 | if (isa<Constant>(Val: BI->getCondition())) |
2732 | return processFoldableCondBr(BI); |
2733 | |
2734 | Value *BranchCond = BI->getCondition(); |
2735 | BasicBlock *TrueSucc = BI->getSuccessor(i: 0); |
2736 | BasicBlock *FalseSucc = BI->getSuccessor(i: 1); |
2737 | // Avoid multiple edges early. |
2738 | if (TrueSucc == FalseSucc) |
2739 | return false; |
2740 | |
2741 | BasicBlock *Parent = BI->getParent(); |
2742 | bool Changed = false; |
2743 | |
2744 | Value *TrueVal = ConstantInt::getTrue(Context&: TrueSucc->getContext()); |
2745 | BasicBlockEdge TrueE(Parent, TrueSucc); |
2746 | Changed |= propagateEquality(LHS: BranchCond, RHS: TrueVal, Root: TrueE, DominatesByEdge: true); |
2747 | |
2748 | Value *FalseVal = ConstantInt::getFalse(Context&: FalseSucc->getContext()); |
2749 | BasicBlockEdge FalseE(Parent, FalseSucc); |
2750 | Changed |= propagateEquality(LHS: BranchCond, RHS: FalseVal, Root: FalseE, DominatesByEdge: true); |
2751 | |
2752 | return Changed; |
2753 | } |
2754 | |
2755 | // For switches, propagate the case values into the case destinations. |
2756 | if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: I)) { |
2757 | Value *SwitchCond = SI->getCondition(); |
2758 | BasicBlock *Parent = SI->getParent(); |
2759 | bool Changed = false; |
2760 | |
2761 | // Remember how many outgoing edges there are to every successor. |
2762 | SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; |
2763 | for (BasicBlock *Succ : successors(BB: Parent)) |
2764 | ++SwitchEdges[Succ]; |
2765 | |
2766 | for (const auto &Case : SI->cases()) { |
2767 | BasicBlock *Dst = Case.getCaseSuccessor(); |
2768 | // If there is only a single edge, propagate the case value into it. |
2769 | if (SwitchEdges.lookup(Val: Dst) == 1) { |
2770 | BasicBlockEdge E(Parent, Dst); |
2771 | Changed |= propagateEquality(LHS: SwitchCond, RHS: Case.getCaseValue(), Root: E, DominatesByEdge: true); |
2772 | } |
2773 | } |
2774 | return Changed; |
2775 | } |
2776 | |
2777 | // Instructions with void type don't return a value, so there's |
2778 | // no point in trying to find redundancies in them. |
2779 | if (I->getType()->isVoidTy()) |
2780 | return false; |
2781 | |
2782 | uint32_t NextNum = VN.getNextUnusedValueNumber(); |
2783 | unsigned Num = VN.lookupOrAdd(V: I); |
2784 | |
2785 | // Allocations are always uniquely numbered, so we can save time and memory |
2786 | // by fast failing them. |
2787 | if (isa<AllocaInst>(Val: I) || I->isTerminator() || isa<PHINode>(Val: I)) { |
2788 | LeaderTable.insert(N: Num, V: I, BB: I->getParent()); |
2789 | return false; |
2790 | } |
2791 | |
2792 | // If the number we were assigned was a brand new VN, then we don't |
2793 | // need to do a lookup to see if the number already exists |
2794 | // somewhere in the domtree: it can't! |
2795 | if (Num >= NextNum) { |
2796 | LeaderTable.insert(N: Num, V: I, BB: I->getParent()); |
2797 | return false; |
2798 | } |
2799 | |
2800 | // Perform fast-path value-number based elimination of values inherited from |
2801 | // dominators. |
2802 | Value *Repl = findLeader(BB: I->getParent(), Num); |
2803 | if (!Repl) { |
2804 | // Failure, just remember this instance for future use. |
2805 | LeaderTable.insert(N: Num, V: I, BB: I->getParent()); |
2806 | return false; |
2807 | } |
2808 | |
2809 | if (Repl == I) { |
2810 | // If I was the result of a shortcut PRE, it might already be in the table |
2811 | // and the best replacement for itself. Nothing to do. |
2812 | return false; |
2813 | } |
2814 | |
2815 | // Remove it! |
2816 | patchAndReplaceAllUsesWith(I, Repl); |
2817 | if (MD && Repl->getType()->isPtrOrPtrVectorTy()) |
2818 | MD->invalidateCachedPointerInfo(Ptr: Repl); |
2819 | salvageAndRemoveInstruction(I); |
2820 | return true; |
2821 | } |
2822 | |
2823 | /// runOnFunction - This is the main transformation entry point for a function. |
2824 | bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, |
2825 | const TargetLibraryInfo &RunTLI, AAResults &RunAA, |
2826 | MemoryDependenceResults *RunMD, LoopInfo &LI, |
2827 | OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { |
2828 | AC = &RunAC; |
2829 | DT = &RunDT; |
2830 | VN.setDomTree(DT); |
2831 | TLI = &RunTLI; |
2832 | VN.setAliasAnalysis(&RunAA); |
2833 | MD = RunMD; |
2834 | ImplicitControlFlowTracking ImplicitCFT; |
2835 | ICF = &ImplicitCFT; |
2836 | this->LI = &LI; |
2837 | VN.setMemDep(M: MD); |
2838 | VN.setMemorySSA(M: MSSA); |
2839 | ORE = RunORE; |
2840 | InvalidBlockRPONumbers = true; |
2841 | MemorySSAUpdater Updater(MSSA); |
2842 | MSSAU = MSSA ? &Updater : nullptr; |
2843 | |
2844 | bool Changed = false; |
2845 | bool ShouldContinue = true; |
2846 | |
2847 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); |
2848 | // Merge unconditional branches, allowing PRE to catch more |
2849 | // optimization opportunities. |
2850 | for (BasicBlock &BB : make_early_inc_range(Range&: F)) { |
2851 | bool RemovedBlock = MergeBlockIntoPredecessor(BB: &BB, DTU: &DTU, LI: &LI, MSSAU, MemDep: MD); |
2852 | if (RemovedBlock) |
2853 | ++NumGVNBlocks; |
2854 | |
2855 | Changed |= RemovedBlock; |
2856 | } |
2857 | DTU.flush(); |
2858 | |
2859 | unsigned Iteration = 0; |
2860 | while (ShouldContinue) { |
2861 | LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n" ); |
2862 | (void) Iteration; |
2863 | ShouldContinue = iterateOnFunction(F); |
2864 | Changed |= ShouldContinue; |
2865 | ++Iteration; |
2866 | } |
2867 | |
2868 | if (isPREEnabled()) { |
2869 | // Fabricate val-num for dead-code in order to suppress assertion in |
2870 | // performPRE(). |
2871 | assignValNumForDeadCode(); |
2872 | bool PREChanged = true; |
2873 | while (PREChanged) { |
2874 | PREChanged = performPRE(F); |
2875 | Changed |= PREChanged; |
2876 | } |
2877 | } |
2878 | |
2879 | // FIXME: Should perform GVN again after PRE does something. PRE can move |
2880 | // computations into blocks where they become fully redundant. Note that |
2881 | // we can't do this until PRE's critical edge splitting updates memdep. |
2882 | // Actually, when this happens, we should just fully integrate PRE into GVN. |
2883 | |
2884 | cleanupGlobalSets(); |
2885 | // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each |
2886 | // iteration. |
2887 | DeadBlocks.clear(); |
2888 | |
2889 | if (MSSA && VerifyMemorySSA) |
2890 | MSSA->verifyMemorySSA(); |
2891 | |
2892 | return Changed; |
2893 | } |
2894 | |
2895 | bool GVNPass::processBlock(BasicBlock *BB) { |
2896 | if (DeadBlocks.count(key: BB)) |
2897 | return false; |
2898 | |
2899 | // Clearing map before every BB because it can be used only for single BB. |
2900 | ReplaceOperandsWithMap.clear(); |
2901 | bool ChangedFunction = false; |
2902 | |
2903 | // Since we may not have visited the input blocks of the phis, we can't |
2904 | // use our normal hash approach for phis. Instead, simply look for |
2905 | // obvious duplicates. The first pass of GVN will tend to create |
2906 | // identical phis, and the second or later passes can eliminate them. |
2907 | SmallPtrSet<PHINode *, 8> PHINodesToRemove; |
2908 | ChangedFunction |= EliminateDuplicatePHINodes(BB, ToRemove&: PHINodesToRemove); |
2909 | for (PHINode *PN : PHINodesToRemove) { |
2910 | removeInstruction(I: PN); |
2911 | } |
2912 | for (Instruction &Inst : make_early_inc_range(Range&: *BB)) { |
2913 | if (!ReplaceOperandsWithMap.empty()) |
2914 | ChangedFunction |= replaceOperandsForInBlockEquality(Instr: &Inst); |
2915 | ChangedFunction |= processInstruction(I: &Inst); |
2916 | } |
2917 | return ChangedFunction; |
2918 | } |
2919 | |
2920 | // Instantiate an expression in a predecessor that lacked it. |
2921 | bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, |
2922 | BasicBlock *Curr, unsigned int ValNo) { |
2923 | // Because we are going top-down through the block, all value numbers |
2924 | // will be available in the predecessor by the time we need them. Any |
2925 | // that weren't originally present will have been instantiated earlier |
2926 | // in this loop. |
2927 | bool Success = true; |
2928 | for (unsigned I = 0, E = Instr->getNumOperands(); I != E; ++I) { |
2929 | Value *Op = Instr->getOperand(i: I); |
2930 | if (isa<Argument>(Val: Op) || isa<Constant>(Val: Op) || isa<GlobalValue>(Val: Op)) |
2931 | continue; |
2932 | // This could be a newly inserted instruction, in which case, we won't |
2933 | // find a value number, and should give up before we hurt ourselves. |
2934 | // FIXME: Rewrite the infrastructure to let it easier to value number |
2935 | // and process newly inserted instructions. |
2936 | if (!VN.exists(V: Op)) { |
2937 | Success = false; |
2938 | break; |
2939 | } |
2940 | uint32_t TValNo = |
2941 | VN.phiTranslate(Pred, PhiBlock: Curr, Num: VN.lookup(V: Op), GVN&: *this); |
2942 | if (Value *V = findLeader(BB: Pred, Num: TValNo)) { |
2943 | Instr->setOperand(i: I, Val: V); |
2944 | } else { |
2945 | Success = false; |
2946 | break; |
2947 | } |
2948 | } |
2949 | |
2950 | // Fail out if we encounter an operand that is not available in |
2951 | // the PRE predecessor. This is typically because of loads which |
2952 | // are not value numbered precisely. |
2953 | if (!Success) |
2954 | return false; |
2955 | |
2956 | Instr->insertBefore(InsertPos: Pred->getTerminator()->getIterator()); |
2957 | Instr->setName(Instr->getName() + ".pre" ); |
2958 | Instr->setDebugLoc(Instr->getDebugLoc()); |
2959 | |
2960 | ICF->insertInstructionTo(Inst: Instr, BB: Pred); |
2961 | |
2962 | unsigned Num = VN.lookupOrAdd(V: Instr); |
2963 | VN.add(V: Instr, Num); |
2964 | |
2965 | // Update the availability map to include the new instruction. |
2966 | LeaderTable.insert(N: Num, V: Instr, BB: Pred); |
2967 | return true; |
2968 | } |
2969 | |
2970 | bool GVNPass::performScalarPRE(Instruction *CurInst) { |
2971 | if (isa<AllocaInst>(Val: CurInst) || CurInst->isTerminator() || |
2972 | isa<PHINode>(Val: CurInst) || CurInst->getType()->isVoidTy() || |
2973 | CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects()) |
2974 | return false; |
2975 | |
2976 | // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from |
2977 | // sinking the compare again, and it would force the code generator to |
2978 | // move the i1 from processor flags or predicate registers into a general |
2979 | // purpose register. |
2980 | if (isa<CmpInst>(Val: CurInst)) |
2981 | return false; |
2982 | |
2983 | // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from |
2984 | // sinking the addressing mode computation back to its uses. Extending the |
2985 | // GEP's live range increases the register pressure, and therefore it can |
2986 | // introduce unnecessary spills. |
2987 | // |
2988 | // This doesn't prevent Load PRE. PHI translation will make the GEP available |
2989 | // to the load by moving it to the predecessor block if necessary. |
2990 | if (isa<GetElementPtrInst>(Val: CurInst)) |
2991 | return false; |
2992 | |
2993 | if (auto *CallB = dyn_cast<CallBase>(Val: CurInst)) { |
2994 | // We don't currently value number ANY inline asm calls. |
2995 | if (CallB->isInlineAsm()) |
2996 | return false; |
2997 | } |
2998 | |
2999 | uint32_t ValNo = VN.lookup(V: CurInst); |
3000 | |
3001 | // Look for the predecessors for PRE opportunities. We're |
3002 | // only trying to solve the basic diamond case, where |
3003 | // a value is computed in the successor and one predecessor, |
3004 | // but not the other. We also explicitly disallow cases |
3005 | // where the successor is its own predecessor, because they're |
3006 | // more complicated to get right. |
3007 | unsigned NumWith = 0; |
3008 | unsigned NumWithout = 0; |
3009 | BasicBlock *PREPred = nullptr; |
3010 | BasicBlock *CurrentBlock = CurInst->getParent(); |
3011 | |
3012 | // Update the RPO numbers for this function. |
3013 | if (InvalidBlockRPONumbers) |
3014 | assignBlockRPONumber(F&: *CurrentBlock->getParent()); |
3015 | |
3016 | SmallVector<std::pair<Value *, BasicBlock *>, 8> PredMap; |
3017 | for (BasicBlock *P : predecessors(BB: CurrentBlock)) { |
3018 | // We're not interested in PRE where blocks with predecessors that are |
3019 | // not reachable. |
3020 | if (!DT->isReachableFromEntry(A: P)) { |
3021 | NumWithout = 2; |
3022 | break; |
3023 | } |
3024 | // It is not safe to do PRE when P->CurrentBlock is a loop backedge. |
3025 | assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) && |
3026 | "Invalid BlockRPONumber map." ); |
3027 | if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) { |
3028 | NumWithout = 2; |
3029 | break; |
3030 | } |
3031 | |
3032 | uint32_t TValNo = VN.phiTranslate(Pred: P, PhiBlock: CurrentBlock, Num: ValNo, GVN&: *this); |
3033 | Value *PredV = findLeader(BB: P, Num: TValNo); |
3034 | if (!PredV) { |
3035 | PredMap.push_back(Elt: std::make_pair(x: static_cast<Value *>(nullptr), y&: P)); |
3036 | PREPred = P; |
3037 | ++NumWithout; |
3038 | } else if (PredV == CurInst) { |
3039 | // CurInst dominates this predecessor. |
3040 | NumWithout = 2; |
3041 | break; |
3042 | } else { |
3043 | PredMap.push_back(Elt: std::make_pair(x&: PredV, y&: P)); |
3044 | ++NumWith; |
3045 | } |
3046 | } |
3047 | |
3048 | // Don't do PRE when it might increase code size, i.e. when |
3049 | // we would need to insert instructions in more than one pred. |
3050 | if (NumWithout > 1 || NumWith == 0) |
3051 | return false; |
3052 | |
3053 | // We may have a case where all predecessors have the instruction, |
3054 | // and we just need to insert a phi node. Otherwise, perform |
3055 | // insertion. |
3056 | Instruction *PREInstr = nullptr; |
3057 | |
3058 | if (NumWithout != 0) { |
3059 | if (!isSafeToSpeculativelyExecute(I: CurInst)) { |
3060 | // It is only valid to insert a new instruction if the current instruction |
3061 | // is always executed. An instruction with implicit control flow could |
3062 | // prevent us from doing it. If we cannot speculate the execution, then |
3063 | // PRE should be prohibited. |
3064 | if (ICF->isDominatedByICFIFromSameBlock(Insn: CurInst)) |
3065 | return false; |
3066 | } |
3067 | |
3068 | // Don't do PRE across indirect branch. |
3069 | if (isa<IndirectBrInst>(Val: PREPred->getTerminator())) |
3070 | return false; |
3071 | |
3072 | // We can't do PRE safely on a critical edge, so instead we schedule |
3073 | // the edge to be split and perform the PRE the next time we iterate |
3074 | // on the function. |
3075 | unsigned SuccNum = GetSuccessorNumber(BB: PREPred, Succ: CurrentBlock); |
3076 | if (isCriticalEdge(TI: PREPred->getTerminator(), SuccNum)) { |
3077 | ToSplit.push_back(Elt: std::make_pair(x: PREPred->getTerminator(), y&: SuccNum)); |
3078 | return false; |
3079 | } |
3080 | // We need to insert somewhere, so let's give it a shot. |
3081 | PREInstr = CurInst->clone(); |
3082 | if (!performScalarPREInsertion(Instr: PREInstr, Pred: PREPred, Curr: CurrentBlock, ValNo)) { |
3083 | // If we failed insertion, make sure we remove the instruction. |
3084 | #ifndef NDEBUG |
3085 | verifyRemoved(PREInstr); |
3086 | #endif |
3087 | PREInstr->deleteValue(); |
3088 | return false; |
3089 | } |
3090 | } |
3091 | |
3092 | // Either we should have filled in the PRE instruction, or we should |
3093 | // not have needed insertions. |
3094 | assert(PREInstr != nullptr || NumWithout == 0); |
3095 | |
3096 | ++NumGVNPRE; |
3097 | |
3098 | // Create a PHI to make the value available in this block. |
3099 | PHINode *Phi = PHINode::Create(Ty: CurInst->getType(), NumReservedValues: PredMap.size(), |
3100 | NameStr: CurInst->getName() + ".pre-phi" ); |
3101 | Phi->insertBefore(InsertPos: CurrentBlock->begin()); |
3102 | for (auto &[V, BB] : PredMap) { |
3103 | if (V) { |
3104 | // If we use an existing value in this phi, we have to patch the original |
3105 | // value because the phi will be used to replace a later value. |
3106 | patchReplacementInstruction(I: CurInst, Repl: V); |
3107 | Phi->addIncoming(V, BB); |
3108 | } else |
3109 | Phi->addIncoming(V: PREInstr, BB: PREPred); |
3110 | } |
3111 | |
3112 | VN.add(V: Phi, Num: ValNo); |
3113 | // After creating a new PHI for ValNo, the phi translate result for ValNo will |
3114 | // be changed, so erase the related stale entries in phi translate cache. |
3115 | VN.eraseTranslateCacheEntry(Num: ValNo, CurrBlock: *CurrentBlock); |
3116 | LeaderTable.insert(N: ValNo, V: Phi, BB: CurrentBlock); |
3117 | Phi->setDebugLoc(CurInst->getDebugLoc()); |
3118 | CurInst->replaceAllUsesWith(V: Phi); |
3119 | if (MD && Phi->getType()->isPtrOrPtrVectorTy()) |
3120 | MD->invalidateCachedPointerInfo(Ptr: Phi); |
3121 | LeaderTable.erase(N: ValNo, I: CurInst, BB: CurrentBlock); |
3122 | |
3123 | LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); |
3124 | removeInstruction(I: CurInst); |
3125 | ++NumGVNInstr; |
3126 | |
3127 | return true; |
3128 | } |
3129 | |
3130 | /// Perform a purely local form of PRE that looks for diamond |
3131 | /// control flow patterns and attempts to perform simple PRE at the join point. |
3132 | bool GVNPass::performPRE(Function &F) { |
3133 | bool Changed = false; |
3134 | for (BasicBlock *CurrentBlock : depth_first(G: &F.getEntryBlock())) { |
3135 | // Nothing to PRE in the entry block. |
3136 | if (CurrentBlock == &F.getEntryBlock()) |
3137 | continue; |
3138 | |
3139 | // Don't perform PRE on an EH pad. |
3140 | if (CurrentBlock->isEHPad()) |
3141 | continue; |
3142 | |
3143 | for (BasicBlock::iterator BI = CurrentBlock->begin(), |
3144 | BE = CurrentBlock->end(); |
3145 | BI != BE;) { |
3146 | Instruction *CurInst = &*BI++; |
3147 | Changed |= performScalarPRE(CurInst); |
3148 | } |
3149 | } |
3150 | |
3151 | if (splitCriticalEdges()) |
3152 | Changed = true; |
3153 | |
3154 | return Changed; |
3155 | } |
3156 | |
3157 | /// Split the critical edge connecting the given two blocks, and return |
3158 | /// the block inserted to the critical edge. |
3159 | BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { |
3160 | // GVN does not require loop-simplify, do not try to preserve it if it is not |
3161 | // possible. |
3162 | BasicBlock *BB = SplitCriticalEdge( |
3163 | Src: Pred, Dst: Succ, |
3164 | Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); |
3165 | if (BB) { |
3166 | if (MD) |
3167 | MD->invalidateCachedPredecessors(); |
3168 | InvalidBlockRPONumbers = true; |
3169 | } |
3170 | return BB; |
3171 | } |
3172 | |
3173 | /// Split critical edges found during the previous |
3174 | /// iteration that may enable further optimization. |
3175 | bool GVNPass::splitCriticalEdges() { |
3176 | if (ToSplit.empty()) |
3177 | return false; |
3178 | |
3179 | bool Changed = false; |
3180 | do { |
3181 | std::pair<Instruction *, unsigned> Edge = ToSplit.pop_back_val(); |
3182 | Changed |= SplitCriticalEdge(TI: Edge.first, SuccNum: Edge.second, |
3183 | Options: CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != |
3184 | nullptr; |
3185 | } while (!ToSplit.empty()); |
3186 | if (Changed) { |
3187 | if (MD) |
3188 | MD->invalidateCachedPredecessors(); |
3189 | InvalidBlockRPONumbers = true; |
3190 | } |
3191 | return Changed; |
3192 | } |
3193 | |
3194 | /// Executes one iteration of GVN. |
3195 | bool GVNPass::iterateOnFunction(Function &F) { |
3196 | cleanupGlobalSets(); |
3197 | |
3198 | // Top-down walk of the dominator tree. |
3199 | bool Changed = false; |
3200 | // Needed for value numbering with phi construction to work. |
3201 | // RPOT walks the graph in its constructor and will not be invalidated during |
3202 | // processBlock. |
3203 | ReversePostOrderTraversal<Function *> RPOT(&F); |
3204 | |
3205 | for (BasicBlock *BB : RPOT) |
3206 | Changed |= processBlock(BB); |
3207 | |
3208 | return Changed; |
3209 | } |
3210 | |
3211 | void GVNPass::cleanupGlobalSets() { |
3212 | VN.clear(); |
3213 | LeaderTable.clear(); |
3214 | BlockRPONumber.clear(); |
3215 | ICF->clear(); |
3216 | InvalidBlockRPONumbers = true; |
3217 | } |
3218 | |
3219 | void GVNPass::removeInstruction(Instruction *I) { |
3220 | VN.erase(V: I); |
3221 | if (MD) MD->removeInstruction(InstToRemove: I); |
3222 | if (MSSAU) |
3223 | MSSAU->removeMemoryAccess(I); |
3224 | #ifndef NDEBUG |
3225 | verifyRemoved(I); |
3226 | #endif |
3227 | ICF->removeInstruction(Inst: I); |
3228 | I->eraseFromParent(); |
3229 | } |
3230 | |
3231 | /// Verify that the specified instruction does not occur in our |
3232 | /// internal data structures. |
3233 | void GVNPass::verifyRemoved(const Instruction *Inst) const { |
3234 | VN.verifyRemoved(V: Inst); |
3235 | LeaderTable.verifyRemoved(V: Inst); |
3236 | } |
3237 | |
3238 | /// BB is declared dead, which implied other blocks become dead as well. This |
3239 | /// function is to add all these blocks to "DeadBlocks". For the dead blocks' |
3240 | /// live successors, update their phi nodes by replacing the operands |
3241 | /// corresponding to dead blocks with UndefVal. |
3242 | void GVNPass::addDeadBlock(BasicBlock *BB) { |
3243 | SmallVector<BasicBlock *, 4> NewDead; |
3244 | SmallSetVector<BasicBlock *, 4> DF; |
3245 | |
3246 | NewDead.push_back(Elt: BB); |
3247 | while (!NewDead.empty()) { |
3248 | BasicBlock *D = NewDead.pop_back_val(); |
3249 | if (DeadBlocks.count(key: D)) |
3250 | continue; |
3251 | |
3252 | // All blocks dominated by D are dead. |
3253 | SmallVector<BasicBlock *, 8> Dom; |
3254 | DT->getDescendants(R: D, Result&: Dom); |
3255 | DeadBlocks.insert_range(R&: Dom); |
3256 | |
3257 | // Figure out the dominance-frontier(D). |
3258 | for (BasicBlock *B : Dom) { |
3259 | for (BasicBlock *S : successors(BB: B)) { |
3260 | if (DeadBlocks.count(key: S)) |
3261 | continue; |
3262 | |
3263 | bool AllPredDead = true; |
3264 | for (BasicBlock *P : predecessors(BB: S)) |
3265 | if (!DeadBlocks.count(key: P)) { |
3266 | AllPredDead = false; |
3267 | break; |
3268 | } |
3269 | |
3270 | if (!AllPredDead) { |
3271 | // S could be proved dead later on. That is why we don't update phi |
3272 | // operands at this moment. |
3273 | DF.insert(X: S); |
3274 | } else { |
3275 | // While S is not dominated by D, it is dead by now. This could take |
3276 | // place if S already have a dead predecessor before D is declared |
3277 | // dead. |
3278 | NewDead.push_back(Elt: S); |
3279 | } |
3280 | } |
3281 | } |
3282 | } |
3283 | |
3284 | // For the dead blocks' live successors, update their phi nodes by replacing |
3285 | // the operands corresponding to dead blocks with UndefVal. |
3286 | for (BasicBlock *B : DF) { |
3287 | if (DeadBlocks.count(key: B)) |
3288 | continue; |
3289 | |
3290 | // First, split the critical edges. This might also create additional blocks |
3291 | // to preserve LoopSimplify form and adjust edges accordingly. |
3292 | SmallVector<BasicBlock *, 4> Preds(predecessors(BB: B)); |
3293 | for (BasicBlock *P : Preds) { |
3294 | if (!DeadBlocks.count(key: P)) |
3295 | continue; |
3296 | |
3297 | if (is_contained(Range: successors(BB: P), Element: B) && |
3298 | isCriticalEdge(TI: P->getTerminator(), Succ: B)) { |
3299 | if (BasicBlock *S = splitCriticalEdges(Pred: P, Succ: B)) |
3300 | DeadBlocks.insert(X: P = S); |
3301 | } |
3302 | } |
3303 | |
3304 | // Now poison the incoming values from the dead predecessors. |
3305 | for (BasicBlock *P : predecessors(BB: B)) { |
3306 | if (!DeadBlocks.count(key: P)) |
3307 | continue; |
3308 | for (PHINode &Phi : B->phis()) { |
3309 | Phi.setIncomingValueForBlock(BB: P, V: PoisonValue::get(T: Phi.getType())); |
3310 | if (MD) |
3311 | MD->invalidateCachedPointerInfo(Ptr: &Phi); |
3312 | } |
3313 | } |
3314 | } |
3315 | } |
3316 | |
3317 | // If the given branch is recognized as a foldable branch (i.e. conditional |
3318 | // branch with constant condition), it will perform following analyses and |
3319 | // transformation. |
3320 | // 1) If the dead out-coming edge is a critical-edge, split it. Let |
3321 | // R be the target of the dead out-coming edge. |
3322 | // 1) Identify the set of dead blocks implied by the branch's dead outcoming |
3323 | // edge. The result of this step will be {X| X is dominated by R} |
3324 | // 2) Identify those blocks which haves at least one dead predecessor. The |
3325 | // result of this step will be dominance-frontier(R). |
3326 | // 3) Update the PHIs in DF(R) by replacing the operands corresponding to |
3327 | // dead blocks with "UndefVal" in an hope these PHIs will optimized away. |
3328 | // |
3329 | // Return true iff *NEW* dead code are found. |
3330 | bool GVNPass::processFoldableCondBr(BranchInst *BI) { |
3331 | if (!BI || BI->isUnconditional()) |
3332 | return false; |
3333 | |
3334 | // If a branch has two identical successors, we cannot declare either dead. |
3335 | if (BI->getSuccessor(i: 0) == BI->getSuccessor(i: 1)) |
3336 | return false; |
3337 | |
3338 | ConstantInt *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition()); |
3339 | if (!Cond) |
3340 | return false; |
3341 | |
3342 | BasicBlock *DeadRoot = |
3343 | Cond->getZExtValue() ? BI->getSuccessor(i: 1) : BI->getSuccessor(i: 0); |
3344 | if (DeadBlocks.count(key: DeadRoot)) |
3345 | return false; |
3346 | |
3347 | if (!DeadRoot->getSinglePredecessor()) |
3348 | DeadRoot = splitCriticalEdges(Pred: BI->getParent(), Succ: DeadRoot); |
3349 | |
3350 | addDeadBlock(BB: DeadRoot); |
3351 | return true; |
3352 | } |
3353 | |
3354 | // performPRE() will trigger assert if it comes across an instruction without |
3355 | // associated val-num. As it normally has far more live instructions than dead |
3356 | // instructions, it makes more sense just to "fabricate" a val-number for the |
3357 | // dead code than checking if instruction involved is dead or not. |
3358 | void GVNPass::assignValNumForDeadCode() { |
3359 | for (BasicBlock *BB : DeadBlocks) { |
3360 | for (Instruction &Inst : *BB) { |
3361 | unsigned ValNum = VN.lookupOrAdd(V: &Inst); |
3362 | LeaderTable.insert(N: ValNum, V: &Inst, BB); |
3363 | } |
3364 | } |
3365 | } |
3366 | |
3367 | class llvm::gvn::GVNLegacyPass : public FunctionPass { |
3368 | public: |
3369 | static char ID; // Pass identification, replacement for typeid. |
3370 | |
3371 | explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep, |
3372 | bool MemSSAAnalysis = GVNEnableMemorySSA) |
3373 | : FunctionPass(ID), Impl(GVNOptions() |
3374 | .setMemDep(MemDepAnalysis) |
3375 | .setMemorySSA(MemSSAAnalysis)) { |
3376 | initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry()); |
3377 | } |
3378 | |
3379 | bool runOnFunction(Function &F) override { |
3380 | if (skipFunction(F)) |
3381 | return false; |
3382 | |
3383 | auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); |
3384 | if (Impl.isMemorySSAEnabled() && !MSSAWP) |
3385 | MSSAWP = &getAnalysis<MemorySSAWrapperPass>(); |
3386 | |
3387 | return Impl.runImpl( |
3388 | F, RunAC&: getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), |
3389 | RunDT&: getAnalysis<DominatorTreeWrapperPass>().getDomTree(), |
3390 | RunTLI: getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F), |
3391 | RunAA&: getAnalysis<AAResultsWrapperPass>().getAAResults(), |
3392 | RunMD: Impl.isMemDepEnabled() |
3393 | ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep() |
3394 | : nullptr, |
3395 | LI&: getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), |
3396 | RunORE: &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), |
3397 | MSSA: MSSAWP ? &MSSAWP->getMSSA() : nullptr); |
3398 | } |
3399 | |
3400 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
3401 | AU.addRequired<AssumptionCacheTracker>(); |
3402 | AU.addRequired<DominatorTreeWrapperPass>(); |
3403 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
3404 | AU.addRequired<LoopInfoWrapperPass>(); |
3405 | if (Impl.isMemDepEnabled()) |
3406 | AU.addRequired<MemoryDependenceWrapperPass>(); |
3407 | AU.addRequired<AAResultsWrapperPass>(); |
3408 | AU.addPreserved<DominatorTreeWrapperPass>(); |
3409 | AU.addPreserved<GlobalsAAWrapperPass>(); |
3410 | AU.addPreserved<TargetLibraryInfoWrapperPass>(); |
3411 | AU.addPreserved<LoopInfoWrapperPass>(); |
3412 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
3413 | AU.addPreserved<MemorySSAWrapperPass>(); |
3414 | if (Impl.isMemorySSAEnabled()) |
3415 | AU.addRequired<MemorySSAWrapperPass>(); |
3416 | } |
3417 | |
3418 | private: |
3419 | GVNPass Impl; |
3420 | }; |
3421 | |
3422 | char GVNLegacyPass::ID = 0; |
3423 | |
3424 | INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn" , "Global Value Numbering" , false, false) |
3425 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
3426 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) |
3427 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) |
3428 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
3429 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
3430 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
3431 | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
3432 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
3433 | INITIALIZE_PASS_END(GVNLegacyPass, "gvn" , "Global Value Numbering" , false, false) |
3434 | |
3435 | // The public interface to this file... |
3436 | FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); } |
3437 | |