1 | //===- InstructionCombining.cpp - Combine multiple instructions -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // InstructionCombining - Combine instructions to form fewer, simple |
10 | // instructions. This pass does not modify the CFG. This pass is where |
11 | // algebraic simplification happens. |
12 | // |
13 | // This pass combines things like: |
14 | // %Y = add i32 %X, 1 |
15 | // %Z = add i32 %Y, 1 |
16 | // into: |
17 | // %Z = add i32 %X, 2 |
18 | // |
19 | // This is a simple worklist driven algorithm. |
20 | // |
21 | // This pass guarantees that the following canonicalizations are performed on |
22 | // the program: |
23 | // 1. If a binary operator has a constant operand, it is moved to the RHS |
24 | // 2. Bitwise operators with constant operands are always grouped so that |
25 | // shifts are performed first, then or's, then and's, then xor's. |
26 | // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible |
27 | // 4. All cmp instructions on boolean values are replaced with logical ops |
28 | // 5. add X, X is represented as (X*2) => (X << 1) |
29 | // 6. Multiplies with a power-of-two constant argument are transformed into |
30 | // shifts. |
31 | // ... etc. |
32 | // |
33 | //===----------------------------------------------------------------------===// |
34 | |
35 | #include "InstCombineInternal.h" |
36 | #include "llvm/ADT/APInt.h" |
37 | #include "llvm/ADT/ArrayRef.h" |
38 | #include "llvm/ADT/DenseMap.h" |
39 | #include "llvm/ADT/SmallPtrSet.h" |
40 | #include "llvm/ADT/SmallVector.h" |
41 | #include "llvm/ADT/Statistic.h" |
42 | #include "llvm/Analysis/AliasAnalysis.h" |
43 | #include "llvm/Analysis/AssumptionCache.h" |
44 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
45 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
46 | #include "llvm/Analysis/CFG.h" |
47 | #include "llvm/Analysis/ConstantFolding.h" |
48 | #include "llvm/Analysis/GlobalsModRef.h" |
49 | #include "llvm/Analysis/InstructionSimplify.h" |
50 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" |
51 | #include "llvm/Analysis/LoopInfo.h" |
52 | #include "llvm/Analysis/MemoryBuiltins.h" |
53 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
54 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
55 | #include "llvm/Analysis/TargetFolder.h" |
56 | #include "llvm/Analysis/TargetLibraryInfo.h" |
57 | #include "llvm/Analysis/TargetTransformInfo.h" |
58 | #include "llvm/Analysis/Utils/Local.h" |
59 | #include "llvm/Analysis/ValueTracking.h" |
60 | #include "llvm/Analysis/VectorUtils.h" |
61 | #include "llvm/IR/BasicBlock.h" |
62 | #include "llvm/IR/CFG.h" |
63 | #include "llvm/IR/Constant.h" |
64 | #include "llvm/IR/Constants.h" |
65 | #include "llvm/IR/DIBuilder.h" |
66 | #include "llvm/IR/DataLayout.h" |
67 | #include "llvm/IR/DebugInfo.h" |
68 | #include "llvm/IR/DerivedTypes.h" |
69 | #include "llvm/IR/Dominators.h" |
70 | #include "llvm/IR/EHPersonalities.h" |
71 | #include "llvm/IR/Function.h" |
72 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
73 | #include "llvm/IR/IRBuilder.h" |
74 | #include "llvm/IR/InstrTypes.h" |
75 | #include "llvm/IR/Instruction.h" |
76 | #include "llvm/IR/Instructions.h" |
77 | #include "llvm/IR/IntrinsicInst.h" |
78 | #include "llvm/IR/Intrinsics.h" |
79 | #include "llvm/IR/Metadata.h" |
80 | #include "llvm/IR/Operator.h" |
81 | #include "llvm/IR/PassManager.h" |
82 | #include "llvm/IR/PatternMatch.h" |
83 | #include "llvm/IR/Type.h" |
84 | #include "llvm/IR/Use.h" |
85 | #include "llvm/IR/User.h" |
86 | #include "llvm/IR/Value.h" |
87 | #include "llvm/IR/ValueHandle.h" |
88 | #include "llvm/InitializePasses.h" |
89 | #include "llvm/Support/Casting.h" |
90 | #include "llvm/Support/CommandLine.h" |
91 | #include "llvm/Support/Compiler.h" |
92 | #include "llvm/Support/Debug.h" |
93 | #include "llvm/Support/DebugCounter.h" |
94 | #include "llvm/Support/ErrorHandling.h" |
95 | #include "llvm/Support/KnownBits.h" |
96 | #include "llvm/Support/raw_ostream.h" |
97 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
98 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
99 | #include "llvm/Transforms/Utils/Local.h" |
100 | #include <algorithm> |
101 | #include <cassert> |
102 | #include <cstdint> |
103 | #include <memory> |
104 | #include <optional> |
105 | #include <string> |
106 | #include <utility> |
107 | |
108 | #define DEBUG_TYPE "instcombine" |
109 | #include "llvm/Transforms/Utils/InstructionWorklist.h" |
110 | #include <optional> |
111 | |
112 | using namespace llvm; |
113 | using namespace llvm::PatternMatch; |
114 | |
115 | STATISTIC(NumWorklistIterations, |
116 | "Number of instruction combining iterations performed" ); |
117 | STATISTIC(NumOneIteration, "Number of functions with one iteration" ); |
118 | STATISTIC(NumTwoIterations, "Number of functions with two iterations" ); |
119 | STATISTIC(NumThreeIterations, "Number of functions with three iterations" ); |
120 | STATISTIC(NumFourOrMoreIterations, |
121 | "Number of functions with four or more iterations" ); |
122 | |
123 | STATISTIC(NumCombined , "Number of insts combined" ); |
124 | STATISTIC(NumConstProp, "Number of constant folds" ); |
125 | STATISTIC(NumDeadInst , "Number of dead inst eliminated" ); |
126 | STATISTIC(NumSunkInst , "Number of instructions sunk" ); |
127 | STATISTIC(NumExpand, "Number of expansions" ); |
128 | STATISTIC(NumFactor , "Number of factorizations" ); |
129 | STATISTIC(NumReassoc , "Number of reassociations" ); |
130 | DEBUG_COUNTER(VisitCounter, "instcombine-visit" , |
131 | "Controls which instructions are visited" ); |
132 | |
133 | static cl::opt<bool> |
134 | EnableCodeSinking("instcombine-code-sinking" , cl::desc("Enable code sinking" ), |
135 | cl::init(Val: true)); |
136 | |
137 | static cl::opt<unsigned> MaxSinkNumUsers( |
138 | "instcombine-max-sink-users" , cl::init(Val: 32), |
139 | cl::desc("Maximum number of undroppable users for instruction sinking" )); |
140 | |
141 | static cl::opt<unsigned> |
142 | MaxArraySize("instcombine-maxarray-size" , cl::init(Val: 1024), |
143 | cl::desc("Maximum array size considered when doing a combine" )); |
144 | |
145 | // FIXME: Remove this flag when it is no longer necessary to convert |
146 | // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false |
147 | // increases variable availability at the cost of accuracy. Variables that |
148 | // cannot be promoted by mem2reg or SROA will be described as living in memory |
149 | // for their entire lifetime. However, passes like DSE and instcombine can |
150 | // delete stores to the alloca, leading to misleading and inaccurate debug |
151 | // information. This flag can be removed when those passes are fixed. |
152 | static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare" , |
153 | cl::Hidden, cl::init(Val: true)); |
154 | |
155 | std::optional<Instruction *> |
156 | InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { |
157 | // Handle target specific intrinsics |
158 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
159 | return TTI.instCombineIntrinsic(IC&: *this, II); |
160 | } |
161 | return std::nullopt; |
162 | } |
163 | |
164 | std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( |
165 | IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, |
166 | bool &KnownBitsComputed) { |
167 | // Handle target specific intrinsics |
168 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
169 | return TTI.simplifyDemandedUseBitsIntrinsic(IC&: *this, II, DemandedMask, Known, |
170 | KnownBitsComputed); |
171 | } |
172 | return std::nullopt; |
173 | } |
174 | |
175 | std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( |
176 | IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, |
177 | APInt &PoisonElts2, APInt &PoisonElts3, |
178 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
179 | SimplifyAndSetOp) { |
180 | // Handle target specific intrinsics |
181 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
182 | return TTI.simplifyDemandedVectorEltsIntrinsic( |
183 | IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, |
184 | SimplifyAndSetOp); |
185 | } |
186 | return std::nullopt; |
187 | } |
188 | |
189 | bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { |
190 | return TTI.isValidAddrSpaceCast(FromAS, ToAS); |
191 | } |
192 | |
193 | Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) { |
194 | if (!RewriteGEP) |
195 | return llvm::emitGEPOffset(Builder: &Builder, DL, GEP); |
196 | |
197 | IRBuilderBase::InsertPointGuard Guard(Builder); |
198 | auto *Inst = dyn_cast<Instruction>(Val: GEP); |
199 | if (Inst) |
200 | Builder.SetInsertPoint(Inst); |
201 | |
202 | Value *Offset = EmitGEPOffset(GEP); |
203 | // If a non-trivial GEP has other uses, rewrite it to avoid duplicating |
204 | // the offset arithmetic. |
205 | if (Inst && !GEP->hasOneUse() && !GEP->hasAllConstantIndices() && |
206 | !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) { |
207 | replaceInstUsesWith( |
208 | I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(), |
209 | IdxList: Offset, Name: "" , NW: GEP->getNoWrapFlags())); |
210 | eraseInstFromFunction(I&: *Inst); |
211 | } |
212 | return Offset; |
213 | } |
214 | |
215 | /// Legal integers and common types are considered desirable. This is used to |
216 | /// avoid creating instructions with types that may not be supported well by the |
217 | /// the backend. |
218 | /// NOTE: This treats i8, i16 and i32 specially because they are common |
219 | /// types in frontend languages. |
220 | bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const { |
221 | switch (BitWidth) { |
222 | case 8: |
223 | case 16: |
224 | case 32: |
225 | return true; |
226 | default: |
227 | return DL.isLegalInteger(Width: BitWidth); |
228 | } |
229 | } |
230 | |
231 | /// Return true if it is desirable to convert an integer computation from a |
232 | /// given bit width to a new bit width. |
233 | /// We don't want to convert from a legal or desirable type (like i8) to an |
234 | /// illegal type or from a smaller to a larger illegal type. A width of '1' |
235 | /// is always treated as a desirable type because i1 is a fundamental type in |
236 | /// IR, and there are many specialized optimizations for i1 types. |
237 | /// Common/desirable widths are equally treated as legal to convert to, in |
238 | /// order to open up more combining opportunities. |
239 | bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, |
240 | unsigned ToWidth) const { |
241 | bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth); |
242 | bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth); |
243 | |
244 | // Convert to desirable widths even if they are not legal types. |
245 | // Only shrink types, to prevent infinite loops. |
246 | if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth)) |
247 | return true; |
248 | |
249 | // If this is a legal or desiable integer from type, and the result would be |
250 | // an illegal type, don't do the transformation. |
251 | if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal) |
252 | return false; |
253 | |
254 | // Otherwise, if both are illegal, do not increase the size of the result. We |
255 | // do allow things like i160 -> i64, but not i64 -> i160. |
256 | if (!FromLegal && !ToLegal && ToWidth > FromWidth) |
257 | return false; |
258 | |
259 | return true; |
260 | } |
261 | |
262 | /// Return true if it is desirable to convert a computation from 'From' to 'To'. |
263 | /// We don't want to convert from a legal to an illegal type or from a smaller |
264 | /// to a larger illegal type. i1 is always treated as a legal type because it is |
265 | /// a fundamental type in IR, and there are many specialized optimizations for |
266 | /// i1 types. |
267 | bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { |
268 | // TODO: This could be extended to allow vectors. Datalayout changes might be |
269 | // needed to properly support that. |
270 | if (!From->isIntegerTy() || !To->isIntegerTy()) |
271 | return false; |
272 | |
273 | unsigned FromWidth = From->getPrimitiveSizeInBits(); |
274 | unsigned ToWidth = To->getPrimitiveSizeInBits(); |
275 | return shouldChangeType(FromWidth, ToWidth); |
276 | } |
277 | |
278 | // Return true, if No Signed Wrap should be maintained for I. |
279 | // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", |
280 | // where both B and C should be ConstantInts, results in a constant that does |
281 | // not overflow. This function only handles the Add and Sub opcodes. For |
282 | // all other opcodes, the function conservatively returns false. |
283 | static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { |
284 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
285 | if (!OBO || !OBO->hasNoSignedWrap()) |
286 | return false; |
287 | |
288 | // We reason about Add and Sub Only. |
289 | Instruction::BinaryOps Opcode = I.getOpcode(); |
290 | if (Opcode != Instruction::Add && Opcode != Instruction::Sub) |
291 | return false; |
292 | |
293 | const APInt *BVal, *CVal; |
294 | if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal))) |
295 | return false; |
296 | |
297 | bool Overflow = false; |
298 | if (Opcode == Instruction::Add) |
299 | (void)BVal->sadd_ov(RHS: *CVal, Overflow); |
300 | else |
301 | (void)BVal->ssub_ov(RHS: *CVal, Overflow); |
302 | |
303 | return !Overflow; |
304 | } |
305 | |
306 | static bool hasNoUnsignedWrap(BinaryOperator &I) { |
307 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
308 | return OBO && OBO->hasNoUnsignedWrap(); |
309 | } |
310 | |
311 | static bool hasNoSignedWrap(BinaryOperator &I) { |
312 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
313 | return OBO && OBO->hasNoSignedWrap(); |
314 | } |
315 | |
316 | /// Conservatively clears subclassOptionalData after a reassociation or |
317 | /// commutation. We preserve fast-math flags when applicable as they can be |
318 | /// preserved. |
319 | static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { |
320 | FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I); |
321 | if (!FPMO) { |
322 | I.clearSubclassOptionalData(); |
323 | return; |
324 | } |
325 | |
326 | FastMathFlags FMF = I.getFastMathFlags(); |
327 | I.clearSubclassOptionalData(); |
328 | I.setFastMathFlags(FMF); |
329 | } |
330 | |
331 | /// Combine constant operands of associative operations either before or after a |
332 | /// cast to eliminate one of the associative operations: |
333 | /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) |
334 | /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) |
335 | static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, |
336 | InstCombinerImpl &IC) { |
337 | auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0)); |
338 | if (!Cast || !Cast->hasOneUse()) |
339 | return false; |
340 | |
341 | // TODO: Enhance logic for other casts and remove this check. |
342 | auto CastOpcode = Cast->getOpcode(); |
343 | if (CastOpcode != Instruction::ZExt) |
344 | return false; |
345 | |
346 | // TODO: Enhance logic for other BinOps and remove this check. |
347 | if (!BinOp1->isBitwiseLogicOp()) |
348 | return false; |
349 | |
350 | auto AssocOpcode = BinOp1->getOpcode(); |
351 | auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0)); |
352 | if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode) |
353 | return false; |
354 | |
355 | Constant *C1, *C2; |
356 | if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) || |
357 | !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2))) |
358 | return false; |
359 | |
360 | // TODO: This assumes a zext cast. |
361 | // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2 |
362 | // to the destination type might lose bits. |
363 | |
364 | // Fold the constants together in the destination type: |
365 | // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC) |
366 | const DataLayout &DL = IC.getDataLayout(); |
367 | Type *DestTy = C1->getType(); |
368 | Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL); |
369 | if (!CastC2) |
370 | return false; |
371 | Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL); |
372 | if (!FoldedC) |
373 | return false; |
374 | |
375 | IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0)); |
376 | IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC); |
377 | BinOp1->dropPoisonGeneratingFlags(); |
378 | Cast->dropPoisonGeneratingFlags(); |
379 | return true; |
380 | } |
381 | |
382 | // Simplifies IntToPtr/PtrToInt RoundTrip Cast. |
383 | // inttoptr ( ptrtoint (x) ) --> x |
384 | Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) { |
385 | auto *IntToPtr = dyn_cast<IntToPtrInst>(Val); |
386 | if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) == |
387 | DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) { |
388 | auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0)); |
389 | Type *CastTy = IntToPtr->getDestTy(); |
390 | if (PtrToInt && |
391 | CastTy->getPointerAddressSpace() == |
392 | PtrToInt->getSrcTy()->getPointerAddressSpace() && |
393 | DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) == |
394 | DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy())) |
395 | return PtrToInt->getOperand(i_nocapture: 0); |
396 | } |
397 | return nullptr; |
398 | } |
399 | |
400 | /// This performs a few simplifications for operators that are associative or |
401 | /// commutative: |
402 | /// |
403 | /// Commutative operators: |
404 | /// |
405 | /// 1. Order operands such that they are listed from right (least complex) to |
406 | /// left (most complex). This puts constants before unary operators before |
407 | /// binary operators. |
408 | /// |
409 | /// Associative operators: |
410 | /// |
411 | /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
412 | /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
413 | /// |
414 | /// Associative and commutative operators: |
415 | /// |
416 | /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
417 | /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
418 | /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
419 | /// if C1 and C2 are constants. |
420 | bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { |
421 | Instruction::BinaryOps Opcode = I.getOpcode(); |
422 | bool Changed = false; |
423 | |
424 | do { |
425 | // Order operands such that they are listed from right (least complex) to |
426 | // left (most complex). This puts constants before unary operators before |
427 | // binary operators. |
428 | if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) < |
429 | getComplexity(V: I.getOperand(i_nocapture: 1))) |
430 | Changed = !I.swapOperands(); |
431 | |
432 | if (I.isCommutative()) { |
433 | if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) { |
434 | replaceOperand(I, OpNum: 0, V: Pair->first); |
435 | replaceOperand(I, OpNum: 1, V: Pair->second); |
436 | Changed = true; |
437 | } |
438 | } |
439 | |
440 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0)); |
441 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1)); |
442 | |
443 | if (I.isAssociative()) { |
444 | // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
445 | if (Op0 && Op0->getOpcode() == Opcode) { |
446 | Value *A = Op0->getOperand(i_nocapture: 0); |
447 | Value *B = Op0->getOperand(i_nocapture: 1); |
448 | Value *C = I.getOperand(i_nocapture: 1); |
449 | |
450 | // Does "B op C" simplify? |
451 | if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) { |
452 | // It simplifies to V. Form "A op V". |
453 | replaceOperand(I, OpNum: 0, V: A); |
454 | replaceOperand(I, OpNum: 1, V); |
455 | bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0); |
456 | bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0); |
457 | |
458 | // Conservatively clear all optional flags since they may not be |
459 | // preserved by the reassociation. Reset nsw/nuw based on the above |
460 | // analysis. |
461 | ClearSubclassDataAfterReassociation(I); |
462 | |
463 | // Note: this is only valid because SimplifyBinOp doesn't look at |
464 | // the operands to Op0. |
465 | if (IsNUW) |
466 | I.setHasNoUnsignedWrap(true); |
467 | |
468 | if (IsNSW) |
469 | I.setHasNoSignedWrap(true); |
470 | |
471 | Changed = true; |
472 | ++NumReassoc; |
473 | continue; |
474 | } |
475 | } |
476 | |
477 | // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
478 | if (Op1 && Op1->getOpcode() == Opcode) { |
479 | Value *A = I.getOperand(i_nocapture: 0); |
480 | Value *B = Op1->getOperand(i_nocapture: 0); |
481 | Value *C = Op1->getOperand(i_nocapture: 1); |
482 | |
483 | // Does "A op B" simplify? |
484 | if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) { |
485 | // It simplifies to V. Form "V op C". |
486 | replaceOperand(I, OpNum: 0, V); |
487 | replaceOperand(I, OpNum: 1, V: C); |
488 | // Conservatively clear the optional flags, since they may not be |
489 | // preserved by the reassociation. |
490 | ClearSubclassDataAfterReassociation(I); |
491 | Changed = true; |
492 | ++NumReassoc; |
493 | continue; |
494 | } |
495 | } |
496 | } |
497 | |
498 | if (I.isAssociative() && I.isCommutative()) { |
499 | if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) { |
500 | Changed = true; |
501 | ++NumReassoc; |
502 | continue; |
503 | } |
504 | |
505 | // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
506 | if (Op0 && Op0->getOpcode() == Opcode) { |
507 | Value *A = Op0->getOperand(i_nocapture: 0); |
508 | Value *B = Op0->getOperand(i_nocapture: 1); |
509 | Value *C = I.getOperand(i_nocapture: 1); |
510 | |
511 | // Does "C op A" simplify? |
512 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
513 | // It simplifies to V. Form "V op B". |
514 | replaceOperand(I, OpNum: 0, V); |
515 | replaceOperand(I, OpNum: 1, V: B); |
516 | // Conservatively clear the optional flags, since they may not be |
517 | // preserved by the reassociation. |
518 | ClearSubclassDataAfterReassociation(I); |
519 | Changed = true; |
520 | ++NumReassoc; |
521 | continue; |
522 | } |
523 | } |
524 | |
525 | // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
526 | if (Op1 && Op1->getOpcode() == Opcode) { |
527 | Value *A = I.getOperand(i_nocapture: 0); |
528 | Value *B = Op1->getOperand(i_nocapture: 0); |
529 | Value *C = Op1->getOperand(i_nocapture: 1); |
530 | |
531 | // Does "C op A" simplify? |
532 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
533 | // It simplifies to V. Form "B op V". |
534 | replaceOperand(I, OpNum: 0, V: B); |
535 | replaceOperand(I, OpNum: 1, V); |
536 | // Conservatively clear the optional flags, since they may not be |
537 | // preserved by the reassociation. |
538 | ClearSubclassDataAfterReassociation(I); |
539 | Changed = true; |
540 | ++NumReassoc; |
541 | continue; |
542 | } |
543 | } |
544 | |
545 | // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
546 | // if C1 and C2 are constants. |
547 | Value *A, *B; |
548 | Constant *C1, *C2, *CRes; |
549 | if (Op0 && Op1 && |
550 | Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && |
551 | match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) && |
552 | match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) && |
553 | (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) { |
554 | bool IsNUW = hasNoUnsignedWrap(I) && |
555 | hasNoUnsignedWrap(I&: *Op0) && |
556 | hasNoUnsignedWrap(I&: *Op1); |
557 | BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? |
558 | BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) : |
559 | BinaryOperator::Create(Op: Opcode, S1: A, S2: B); |
560 | |
561 | if (isa<FPMathOperator>(Val: NewBO)) { |
562 | FastMathFlags Flags = I.getFastMathFlags() & |
563 | Op0->getFastMathFlags() & |
564 | Op1->getFastMathFlags(); |
565 | NewBO->setFastMathFlags(Flags); |
566 | } |
567 | InsertNewInstWith(New: NewBO, Old: I.getIterator()); |
568 | NewBO->takeName(V: Op1); |
569 | replaceOperand(I, OpNum: 0, V: NewBO); |
570 | replaceOperand(I, OpNum: 1, V: CRes); |
571 | // Conservatively clear the optional flags, since they may not be |
572 | // preserved by the reassociation. |
573 | ClearSubclassDataAfterReassociation(I); |
574 | if (IsNUW) |
575 | I.setHasNoUnsignedWrap(true); |
576 | |
577 | Changed = true; |
578 | continue; |
579 | } |
580 | } |
581 | |
582 | // No further simplifications. |
583 | return Changed; |
584 | } while (true); |
585 | } |
586 | |
587 | /// Return whether "X LOp (Y ROp Z)" is always equal to |
588 | /// "(X LOp Y) ROp (X LOp Z)". |
589 | static bool leftDistributesOverRight(Instruction::BinaryOps LOp, |
590 | Instruction::BinaryOps ROp) { |
591 | // X & (Y | Z) <--> (X & Y) | (X & Z) |
592 | // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) |
593 | if (LOp == Instruction::And) |
594 | return ROp == Instruction::Or || ROp == Instruction::Xor; |
595 | |
596 | // X | (Y & Z) <--> (X | Y) & (X | Z) |
597 | if (LOp == Instruction::Or) |
598 | return ROp == Instruction::And; |
599 | |
600 | // X * (Y + Z) <--> (X * Y) + (X * Z) |
601 | // X * (Y - Z) <--> (X * Y) - (X * Z) |
602 | if (LOp == Instruction::Mul) |
603 | return ROp == Instruction::Add || ROp == Instruction::Sub; |
604 | |
605 | return false; |
606 | } |
607 | |
608 | /// Return whether "(X LOp Y) ROp Z" is always equal to |
609 | /// "(X ROp Z) LOp (Y ROp Z)". |
610 | static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, |
611 | Instruction::BinaryOps ROp) { |
612 | if (Instruction::isCommutative(Opcode: ROp)) |
613 | return leftDistributesOverRight(LOp: ROp, ROp: LOp); |
614 | |
615 | // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. |
616 | return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp); |
617 | |
618 | // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", |
619 | // but this requires knowing that the addition does not overflow and other |
620 | // such subtleties. |
621 | } |
622 | |
623 | /// This function returns identity value for given opcode, which can be used to |
624 | /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). |
625 | static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { |
626 | if (isa<Constant>(Val: V)) |
627 | return nullptr; |
628 | |
629 | return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType()); |
630 | } |
631 | |
632 | /// This function predicates factorization using distributive laws. By default, |
633 | /// it just returns the 'Op' inputs. But for special-cases like |
634 | /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add |
635 | /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to |
636 | /// allow more factorization opportunities. |
637 | static Instruction::BinaryOps |
638 | getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, |
639 | Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) { |
640 | assert(Op && "Expected a binary operator" ); |
641 | LHS = Op->getOperand(i_nocapture: 0); |
642 | RHS = Op->getOperand(i_nocapture: 1); |
643 | if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { |
644 | Constant *C; |
645 | if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) { |
646 | // X << C --> X * (1 << C) |
647 | RHS = ConstantFoldBinaryInstruction( |
648 | Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C); |
649 | assert(RHS && "Constant folding of immediate constants failed" ); |
650 | return Instruction::Mul; |
651 | } |
652 | // TODO: We can add other conversions e.g. shr => div etc. |
653 | } |
654 | if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) { |
655 | if (OtherOp && OtherOp->getOpcode() == Instruction::AShr && |
656 | match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) { |
657 | // lshr nneg C, X --> ashr nneg C, X |
658 | return Instruction::AShr; |
659 | } |
660 | } |
661 | return Op->getOpcode(); |
662 | } |
663 | |
664 | /// This tries to simplify binary operations by factorizing out common terms |
665 | /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). |
666 | static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, |
667 | InstCombiner::BuilderTy &Builder, |
668 | Instruction::BinaryOps InnerOpcode, Value *A, |
669 | Value *B, Value *C, Value *D) { |
670 | assert(A && B && C && D && "All values must be provided" ); |
671 | |
672 | Value *V = nullptr; |
673 | Value *RetVal = nullptr; |
674 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
675 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
676 | |
677 | // Does "X op' Y" always equal "Y op' X"? |
678 | bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode); |
679 | |
680 | // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? |
681 | if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) { |
682 | // Does the instruction have the form "(A op' B) op (A op' D)" or, in the |
683 | // commutative case, "(A op' B) op (C op' A)"? |
684 | if (A == C || (InnerCommutative && A == D)) { |
685 | if (A != C) |
686 | std::swap(a&: C, b&: D); |
687 | // Consider forming "A op' (B op D)". |
688 | // If "B op D" simplifies then it can be formed with no cost. |
689 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I)); |
690 | |
691 | // If "B op D" doesn't simplify then only go on if one of the existing |
692 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
693 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
694 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName()); |
695 | if (V) |
696 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V); |
697 | } |
698 | } |
699 | |
700 | // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? |
701 | if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) { |
702 | // Does the instruction have the form "(A op' B) op (C op' B)" or, in the |
703 | // commutative case, "(A op' B) op (B op' D)"? |
704 | if (B == D || (InnerCommutative && B == C)) { |
705 | if (B != D) |
706 | std::swap(a&: C, b&: D); |
707 | // Consider forming "(A op C) op' B". |
708 | // If "A op C" simplifies then it can be formed with no cost. |
709 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I)); |
710 | |
711 | // If "A op C" doesn't simplify then only go on if one of the existing |
712 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
713 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
714 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName()); |
715 | if (V) |
716 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B); |
717 | } |
718 | } |
719 | |
720 | if (!RetVal) |
721 | return nullptr; |
722 | |
723 | ++NumFactor; |
724 | RetVal->takeName(V: &I); |
725 | |
726 | // Try to add no-overflow flags to the final value. |
727 | if (isa<OverflowingBinaryOperator>(Val: RetVal)) { |
728 | bool HasNSW = false; |
729 | bool HasNUW = false; |
730 | if (isa<OverflowingBinaryOperator>(Val: &I)) { |
731 | HasNSW = I.hasNoSignedWrap(); |
732 | HasNUW = I.hasNoUnsignedWrap(); |
733 | } |
734 | if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) { |
735 | HasNSW &= LOBO->hasNoSignedWrap(); |
736 | HasNUW &= LOBO->hasNoUnsignedWrap(); |
737 | } |
738 | |
739 | if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) { |
740 | HasNSW &= ROBO->hasNoSignedWrap(); |
741 | HasNUW &= ROBO->hasNoUnsignedWrap(); |
742 | } |
743 | |
744 | if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { |
745 | // We can propagate 'nsw' if we know that |
746 | // %Y = mul nsw i16 %X, C |
747 | // %Z = add nsw i16 %Y, %X |
748 | // => |
749 | // %Z = mul nsw i16 %X, C+1 |
750 | // |
751 | // iff C+1 isn't INT_MIN |
752 | const APInt *CInt; |
753 | if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue()) |
754 | cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW); |
755 | |
756 | // nuw can be propagated with any constant or nuw value. |
757 | cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW); |
758 | } |
759 | } |
760 | return RetVal; |
761 | } |
762 | |
763 | // If `I` has one Const operand and the other matches `(ctpop (not x))`, |
764 | // replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`. |
765 | // This is only useful is the new subtract can fold so we only handle the |
766 | // following cases: |
767 | // 1) (add/sub/disjoint_or C, (ctpop (not x)) |
768 | // -> (add/sub/disjoint_or C', (ctpop x)) |
769 | // 1) (cmp pred C, (ctpop (not x)) |
770 | // -> (cmp pred C', (ctpop x)) |
771 | Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) { |
772 | unsigned Opc = I->getOpcode(); |
773 | unsigned ConstIdx = 1; |
774 | switch (Opc) { |
775 | default: |
776 | return nullptr; |
777 | // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x)) |
778 | // We can fold the BitWidth(x) with add/sub/icmp as long the other operand |
779 | // is constant. |
780 | case Instruction::Sub: |
781 | ConstIdx = 0; |
782 | break; |
783 | case Instruction::ICmp: |
784 | // Signed predicates aren't correct in some edge cases like for i2 types, as |
785 | // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed |
786 | // comparisons against it are simplfied to unsigned. |
787 | if (cast<ICmpInst>(Val: I)->isSigned()) |
788 | return nullptr; |
789 | break; |
790 | case Instruction::Or: |
791 | if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value()))) |
792 | return nullptr; |
793 | [[fallthrough]]; |
794 | case Instruction::Add: |
795 | break; |
796 | } |
797 | |
798 | Value *Op; |
799 | // Find ctpop. |
800 | if (!match(V: I->getOperand(i: 1 - ConstIdx), |
801 | P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op))))) |
802 | return nullptr; |
803 | |
804 | Constant *C; |
805 | // Check other operand is ImmConstant. |
806 | if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C))) |
807 | return nullptr; |
808 | |
809 | Type *Ty = Op->getType(); |
810 | Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits()); |
811 | // Need extra check for icmp. Note if this check is true, it generally means |
812 | // the icmp will simplify to true/false. |
813 | if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) { |
814 | Constant *Cmp = |
815 | ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL); |
816 | if (!Cmp || !Cmp->isZeroValue()) |
817 | return nullptr; |
818 | } |
819 | |
820 | // Check we can invert `(not x)` for free. |
821 | bool Consumes = false; |
822 | if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes) |
823 | return nullptr; |
824 | Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder); |
825 | assert(NotOp != nullptr && |
826 | "Desync between isFreeToInvert and getFreelyInverted" ); |
827 | |
828 | Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp); |
829 | |
830 | Value *R = nullptr; |
831 | |
832 | // Do the transformation here to avoid potentially introducing an infinite |
833 | // loop. |
834 | switch (Opc) { |
835 | case Instruction::Sub: |
836 | R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC)); |
837 | break; |
838 | case Instruction::Or: |
839 | case Instruction::Add: |
840 | R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp); |
841 | break; |
842 | case Instruction::ICmp: |
843 | R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(), |
844 | LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C)); |
845 | break; |
846 | default: |
847 | llvm_unreachable("Unhandled Opcode" ); |
848 | } |
849 | assert(R != nullptr); |
850 | return replaceInstUsesWith(I&: *I, V: R); |
851 | } |
852 | |
853 | // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) |
854 | // IFF |
855 | // 1) the logic_shifts match |
856 | // 2) either both binops are binops and one is `and` or |
857 | // BinOp1 is `and` |
858 | // (logic_shift (inv_logic_shift C1, C), C) == C1 or |
859 | // |
860 | // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) |
861 | // |
862 | // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) |
863 | // IFF |
864 | // 1) the logic_shifts match |
865 | // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). |
866 | // |
867 | // -> (BinOp (logic_shift (BinOp X, Y)), Mask) |
868 | // |
869 | // (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt)) |
870 | // IFF |
871 | // 1) Binop1 is bitwise logical operator `and`, `or` or `xor` |
872 | // 2) Binop2 is `not` |
873 | // |
874 | // -> (arithmetic_shift Binop1((not X), Y), Amt) |
875 | |
876 | Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { |
877 | const DataLayout &DL = I.getDataLayout(); |
878 | auto IsValidBinOpc = [](unsigned Opc) { |
879 | switch (Opc) { |
880 | default: |
881 | return false; |
882 | case Instruction::And: |
883 | case Instruction::Or: |
884 | case Instruction::Xor: |
885 | case Instruction::Add: |
886 | // Skip Sub as we only match constant masks which will canonicalize to use |
887 | // add. |
888 | return true; |
889 | } |
890 | }; |
891 | |
892 | // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra |
893 | // constraints. |
894 | auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, |
895 | unsigned ShOpc) { |
896 | assert(ShOpc != Instruction::AShr); |
897 | return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || |
898 | ShOpc == Instruction::Shl; |
899 | }; |
900 | |
901 | auto GetInvShift = [](unsigned ShOpc) { |
902 | assert(ShOpc != Instruction::AShr); |
903 | return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; |
904 | }; |
905 | |
906 | auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, |
907 | unsigned ShOpc, Constant *CMask, |
908 | Constant *CShift) { |
909 | // If the BinOp1 is `and` we don't need to check the mask. |
910 | if (BinOpc1 == Instruction::And) |
911 | return true; |
912 | |
913 | // For all other possible transfers we need complete distributable |
914 | // binop/shift (anything but `add` + `lshr`). |
915 | if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) |
916 | return false; |
917 | |
918 | // If BinOp2 is `and`, any mask works (this only really helps for non-splat |
919 | // vecs, otherwise the mask will be simplified and the following check will |
920 | // handle it). |
921 | if (BinOpc2 == Instruction::And) |
922 | return true; |
923 | |
924 | // Otherwise, need mask that meets the below requirement. |
925 | // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask |
926 | Constant *MaskInvShift = |
927 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
928 | return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) == |
929 | CMask; |
930 | }; |
931 | |
932 | auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { |
933 | Constant *CMask, *CShift; |
934 | Value *X, *Y, *ShiftedX, *Mask, *Shift; |
935 | if (!match(V: I.getOperand(i_nocapture: ShOpnum), |
936 | P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift))))) |
937 | return nullptr; |
938 | if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum), |
939 | P: m_BinOp(L: m_Value(V&: ShiftedX), R: m_Value(V&: Mask)))) |
940 | return nullptr; |
941 | |
942 | if (!match(V: ShiftedX, P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))))) |
943 | return nullptr; |
944 | |
945 | // Make sure we are matching instruction shifts and not ConstantExpr |
946 | auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum)); |
947 | auto *IX = dyn_cast<Instruction>(Val: ShiftedX); |
948 | if (!IY || !IX) |
949 | return nullptr; |
950 | |
951 | // LHS and RHS need same shift opcode |
952 | unsigned ShOpc = IY->getOpcode(); |
953 | if (ShOpc != IX->getOpcode()) |
954 | return nullptr; |
955 | |
956 | // Make sure binop is real instruction and not ConstantExpr |
957 | auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum)); |
958 | if (!BO2) |
959 | return nullptr; |
960 | |
961 | unsigned BinOpc = BO2->getOpcode(); |
962 | // Make sure we have valid binops. |
963 | if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) |
964 | return nullptr; |
965 | |
966 | if (ShOpc == Instruction::AShr) { |
967 | if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) && |
968 | BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) { |
969 | Value *NotX = Builder.CreateNot(V: X); |
970 | Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX); |
971 | return BinaryOperator::Create( |
972 | Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift); |
973 | } |
974 | |
975 | return nullptr; |
976 | } |
977 | |
978 | // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just |
979 | // distribute to drop the shift irrelevant of constants. |
980 | if (BinOpc == I.getOpcode() && |
981 | IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { |
982 | Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y); |
983 | Value *NewBinOp1 = Builder.CreateBinOp( |
984 | Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift); |
985 | return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask); |
986 | } |
987 | |
988 | // Otherwise we can only distribute by constant shifting the mask, so |
989 | // ensure we have constants. |
990 | if (!match(V: Shift, P: m_ImmConstant(C&: CShift))) |
991 | return nullptr; |
992 | if (!match(V: Mask, P: m_ImmConstant(C&: CMask))) |
993 | return nullptr; |
994 | |
995 | // Check if we can distribute the binops. |
996 | if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) |
997 | return nullptr; |
998 | |
999 | Constant *NewCMask = |
1000 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
1001 | Value *NewBinOp2 = Builder.CreateBinOp( |
1002 | Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask); |
1003 | Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2); |
1004 | return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc), |
1005 | S1: NewBinOp1, S2: CShift); |
1006 | }; |
1007 | |
1008 | if (Instruction *R = MatchBinOp(0)) |
1009 | return R; |
1010 | return MatchBinOp(1); |
1011 | } |
1012 | |
1013 | // (Binop (zext C), (select C, T, F)) |
1014 | // -> (select C, (binop 1, T), (binop 0, F)) |
1015 | // |
1016 | // (Binop (sext C), (select C, T, F)) |
1017 | // -> (select C, (binop -1, T), (binop 0, F)) |
1018 | // |
1019 | // Attempt to simplify binary operations into a select with folded args, when |
1020 | // one operand of the binop is a select instruction and the other operand is a |
1021 | // zext/sext extension, whose value is the select condition. |
1022 | Instruction * |
1023 | InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { |
1024 | // TODO: this simplification may be extended to any speculatable instruction, |
1025 | // not just binops, and would possibly be handled better in FoldOpIntoSelect. |
1026 | Instruction::BinaryOps Opc = I.getOpcode(); |
1027 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1028 | Value *A, *CondVal, *TrueVal, *FalseVal; |
1029 | Value *CastOp; |
1030 | |
1031 | auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) { |
1032 | return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) && |
1033 | A->getType()->getScalarSizeInBits() == 1 && |
1034 | match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal), |
1035 | R: m_Value(V&: FalseVal))); |
1036 | }; |
1037 | |
1038 | // Make sure one side of the binop is a select instruction, and the other is a |
1039 | // zero/sign extension operating on a i1. |
1040 | if (MatchSelectAndCast(LHS, RHS)) |
1041 | CastOp = LHS; |
1042 | else if (MatchSelectAndCast(RHS, LHS)) |
1043 | CastOp = RHS; |
1044 | else |
1045 | return nullptr; |
1046 | |
1047 | auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { |
1048 | bool IsCastOpRHS = (CastOp == RHS); |
1049 | bool IsZExt = isa<ZExtInst>(Val: CastOp); |
1050 | Constant *C; |
1051 | |
1052 | if (IsTrueArm) { |
1053 | C = Constant::getNullValue(Ty: V->getType()); |
1054 | } else if (IsZExt) { |
1055 | unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
1056 | C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1)); |
1057 | } else { |
1058 | C = Constant::getAllOnesValue(Ty: V->getType()); |
1059 | } |
1060 | |
1061 | return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C) |
1062 | : Builder.CreateBinOp(Opc, LHS: C, RHS: V); |
1063 | }; |
1064 | |
1065 | // If the value used in the zext/sext is the select condition, or the negated |
1066 | // of the select condition, the binop can be simplified. |
1067 | if (CondVal == A) { |
1068 | Value *NewTrueVal = NewFoldedConst(false, TrueVal); |
1069 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
1070 | S2: NewFoldedConst(true, FalseVal)); |
1071 | } |
1072 | |
1073 | if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) { |
1074 | Value *NewTrueVal = NewFoldedConst(true, TrueVal); |
1075 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
1076 | S2: NewFoldedConst(false, FalseVal)); |
1077 | } |
1078 | |
1079 | return nullptr; |
1080 | } |
1081 | |
1082 | Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { |
1083 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1084 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
1085 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
1086 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
1087 | Value *A, *B, *C, *D; |
1088 | Instruction::BinaryOps LHSOpcode, RHSOpcode; |
1089 | |
1090 | if (Op0) |
1091 | LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1); |
1092 | if (Op1) |
1093 | RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0); |
1094 | |
1095 | // The instruction has the form "(A op' B) op (C op' D)". Try to factorize |
1096 | // a common term. |
1097 | if (Op0 && Op1 && LHSOpcode == RHSOpcode) |
1098 | if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D)) |
1099 | return V; |
1100 | |
1101 | // The instruction has the form "(A op' B) op (C)". Try to factorize common |
1102 | // term. |
1103 | if (Op0) |
1104 | if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS)) |
1105 | if (Value *V = |
1106 | tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident)) |
1107 | return V; |
1108 | |
1109 | // The instruction has the form "(B) op (C op' D)". Try to factorize common |
1110 | // term. |
1111 | if (Op1) |
1112 | if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS)) |
1113 | if (Value *V = |
1114 | tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D)) |
1115 | return V; |
1116 | |
1117 | return nullptr; |
1118 | } |
1119 | |
1120 | /// This tries to simplify binary operations which some other binary operation |
1121 | /// distributes over either by factorizing out common terms |
1122 | /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in |
1123 | /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). |
1124 | /// Returns the simplified value, or null if it didn't simplify. |
1125 | Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { |
1126 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1127 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
1128 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
1129 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
1130 | |
1131 | // Factorization. |
1132 | if (Value *R = tryFactorizationFolds(I)) |
1133 | return R; |
1134 | |
1135 | // Expansion. |
1136 | if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) { |
1137 | // The instruction has the form "(A op' B) op C". See if expanding it out |
1138 | // to "(A op C) op' (B op C)" results in simplifications. |
1139 | Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS; |
1140 | Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' |
1141 | |
1142 | // Disable the use of undef because it's not safe to distribute undef. |
1143 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
1144 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
1145 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive); |
1146 | |
1147 | // Do "A op C" and "B op C" both simplify? |
1148 | if (L && R) { |
1149 | // They do! Return "L op' R". |
1150 | ++NumExpand; |
1151 | C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
1152 | C->takeName(V: &I); |
1153 | return C; |
1154 | } |
1155 | |
1156 | // Does "A op C" simplify to the identity value for the inner opcode? |
1157 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
1158 | // They do! Return "B op C". |
1159 | ++NumExpand; |
1160 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C); |
1161 | C->takeName(V: &I); |
1162 | return C; |
1163 | } |
1164 | |
1165 | // Does "B op C" simplify to the identity value for the inner opcode? |
1166 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
1167 | // They do! Return "A op C". |
1168 | ++NumExpand; |
1169 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
1170 | C->takeName(V: &I); |
1171 | return C; |
1172 | } |
1173 | } |
1174 | |
1175 | if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) { |
1176 | // The instruction has the form "A op (B op' C)". See if expanding it out |
1177 | // to "(A op B) op' (A op C)" results in simplifications. |
1178 | Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1); |
1179 | Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' |
1180 | |
1181 | // Disable the use of undef because it's not safe to distribute undef. |
1182 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
1183 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive); |
1184 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
1185 | |
1186 | // Do "A op B" and "A op C" both simplify? |
1187 | if (L && R) { |
1188 | // They do! Return "L op' R". |
1189 | ++NumExpand; |
1190 | A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
1191 | A->takeName(V: &I); |
1192 | return A; |
1193 | } |
1194 | |
1195 | // Does "A op B" simplify to the identity value for the inner opcode? |
1196 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
1197 | // They do! Return "A op C". |
1198 | ++NumExpand; |
1199 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
1200 | A->takeName(V: &I); |
1201 | return A; |
1202 | } |
1203 | |
1204 | // Does "A op C" simplify to the identity value for the inner opcode? |
1205 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
1206 | // They do! Return "A op B". |
1207 | ++NumExpand; |
1208 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B); |
1209 | A->takeName(V: &I); |
1210 | return A; |
1211 | } |
1212 | } |
1213 | |
1214 | return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); |
1215 | } |
1216 | |
1217 | static std::optional<std::pair<Value *, Value *>> |
1218 | matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { |
1219 | if (LHS->getParent() != RHS->getParent()) |
1220 | return std::nullopt; |
1221 | |
1222 | if (LHS->getNumIncomingValues() < 2) |
1223 | return std::nullopt; |
1224 | |
1225 | if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks())) |
1226 | return std::nullopt; |
1227 | |
1228 | Value *L0 = LHS->getIncomingValue(i: 0); |
1229 | Value *R0 = RHS->getIncomingValue(i: 0); |
1230 | |
1231 | for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) { |
1232 | Value *L1 = LHS->getIncomingValue(i: I); |
1233 | Value *R1 = RHS->getIncomingValue(i: I); |
1234 | |
1235 | if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1)) |
1236 | continue; |
1237 | |
1238 | return std::nullopt; |
1239 | } |
1240 | |
1241 | return std::optional(std::pair(L0, R0)); |
1242 | } |
1243 | |
1244 | std::optional<std::pair<Value *, Value *>> |
1245 | InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) { |
1246 | Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS); |
1247 | Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS); |
1248 | if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode()) |
1249 | return std::nullopt; |
1250 | switch (LHSInst->getOpcode()) { |
1251 | case Instruction::PHI: |
1252 | return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS)); |
1253 | case Instruction::Select: { |
1254 | Value *Cond = LHSInst->getOperand(i: 0); |
1255 | Value *TrueVal = LHSInst->getOperand(i: 1); |
1256 | Value *FalseVal = LHSInst->getOperand(i: 2); |
1257 | if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) && |
1258 | FalseVal == RHSInst->getOperand(i: 1)) |
1259 | return std::pair(TrueVal, FalseVal); |
1260 | return std::nullopt; |
1261 | } |
1262 | case Instruction::Call: { |
1263 | // Match min(a, b) and max(a, b) |
1264 | MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst); |
1265 | MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst); |
1266 | if (LHSMinMax && RHSMinMax && |
1267 | LHSMinMax->getPredicate() == |
1268 | ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) && |
1269 | ((LHSMinMax->getLHS() == RHSMinMax->getLHS() && |
1270 | LHSMinMax->getRHS() == RHSMinMax->getRHS()) || |
1271 | (LHSMinMax->getLHS() == RHSMinMax->getRHS() && |
1272 | LHSMinMax->getRHS() == RHSMinMax->getLHS()))) |
1273 | return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS()); |
1274 | return std::nullopt; |
1275 | } |
1276 | default: |
1277 | return std::nullopt; |
1278 | } |
1279 | } |
1280 | |
1281 | Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, |
1282 | Value *LHS, |
1283 | Value *RHS) { |
1284 | Value *A, *B, *C, *D, *E, *F; |
1285 | bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C))); |
1286 | bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F))); |
1287 | if (!LHSIsSelect && !RHSIsSelect) |
1288 | return nullptr; |
1289 | |
1290 | FastMathFlags FMF; |
1291 | BuilderTy::FastMathFlagGuard Guard(Builder); |
1292 | if (isa<FPMathOperator>(Val: &I)) { |
1293 | FMF = I.getFastMathFlags(); |
1294 | Builder.setFastMathFlags(FMF); |
1295 | } |
1296 | |
1297 | Instruction::BinaryOps Opcode = I.getOpcode(); |
1298 | SimplifyQuery Q = SQ.getWithInstruction(I: &I); |
1299 | |
1300 | Value *Cond, *True = nullptr, *False = nullptr; |
1301 | |
1302 | // Special-case for add/negate combination. Replace the zero in the negation |
1303 | // with the trailing add operand: |
1304 | // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N) |
1305 | // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False |
1306 | auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * { |
1307 | // We need an 'add' and exactly 1 arm of the select to have been simplified. |
1308 | if (Opcode != Instruction::Add || (!True && !False) || (True && False)) |
1309 | return nullptr; |
1310 | |
1311 | Value *N; |
1312 | if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) { |
1313 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
1314 | return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName()); |
1315 | } |
1316 | if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) { |
1317 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
1318 | return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName()); |
1319 | } |
1320 | return nullptr; |
1321 | }; |
1322 | |
1323 | if (LHSIsSelect && RHSIsSelect && A == D) { |
1324 | // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) |
1325 | Cond = A; |
1326 | True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q); |
1327 | False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q); |
1328 | |
1329 | if (LHS->hasOneUse() && RHS->hasOneUse()) { |
1330 | if (False && !True) |
1331 | True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E); |
1332 | else if (True && !False) |
1333 | False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F); |
1334 | } |
1335 | } else if (LHSIsSelect && LHS->hasOneUse()) { |
1336 | // (A ? B : C) op Y -> A ? (B op Y) : (C op Y) |
1337 | Cond = A; |
1338 | True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q); |
1339 | False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q); |
1340 | if (Value *NewSel = foldAddNegate(B, C, RHS)) |
1341 | return NewSel; |
1342 | } else if (RHSIsSelect && RHS->hasOneUse()) { |
1343 | // X op (D ? E : F) -> D ? (X op E) : (X op F) |
1344 | Cond = D; |
1345 | True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q); |
1346 | False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q); |
1347 | if (Value *NewSel = foldAddNegate(E, F, LHS)) |
1348 | return NewSel; |
1349 | } |
1350 | |
1351 | if (!True || !False) |
1352 | return nullptr; |
1353 | |
1354 | Value *SI = Builder.CreateSelect(C: Cond, True, False); |
1355 | SI->takeName(V: &I); |
1356 | return SI; |
1357 | } |
1358 | |
1359 | /// Freely adapt every user of V as-if V was changed to !V. |
1360 | /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. |
1361 | void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) { |
1362 | assert(!isa<Constant>(I) && "Shouldn't invert users of constant" ); |
1363 | for (User *U : make_early_inc_range(Range: I->users())) { |
1364 | if (U == IgnoredUser) |
1365 | continue; // Don't consider this user. |
1366 | switch (cast<Instruction>(Val: U)->getOpcode()) { |
1367 | case Instruction::Select: { |
1368 | auto *SI = cast<SelectInst>(Val: U); |
1369 | SI->swapValues(); |
1370 | SI->swapProfMetadata(); |
1371 | break; |
1372 | } |
1373 | case Instruction::Br: { |
1374 | BranchInst *BI = cast<BranchInst>(Val: U); |
1375 | BI->swapSuccessors(); // swaps prof metadata too |
1376 | if (BPI) |
1377 | BPI->swapSuccEdgesProbabilities(Src: BI->getParent()); |
1378 | break; |
1379 | } |
1380 | case Instruction::Xor: |
1381 | replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I); |
1382 | // Add to worklist for DCE. |
1383 | addToWorklist(I: cast<Instruction>(Val: U)); |
1384 | break; |
1385 | default: |
1386 | llvm_unreachable("Got unexpected user - out of sync with " |
1387 | "canFreelyInvertAllUsersOf() ?" ); |
1388 | } |
1389 | } |
1390 | } |
1391 | |
1392 | /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a |
1393 | /// constant zero (which is the 'negate' form). |
1394 | Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { |
1395 | Value *NegV; |
1396 | if (match(V, P: m_Neg(V: m_Value(V&: NegV)))) |
1397 | return NegV; |
1398 | |
1399 | // Constants can be considered to be negated values if they can be folded. |
1400 | if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V)) |
1401 | return ConstantExpr::getNeg(C); |
1402 | |
1403 | if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V)) |
1404 | if (C->getType()->getElementType()->isIntegerTy()) |
1405 | return ConstantExpr::getNeg(C); |
1406 | |
1407 | if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) { |
1408 | for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { |
1409 | Constant *Elt = CV->getAggregateElement(Elt: i); |
1410 | if (!Elt) |
1411 | return nullptr; |
1412 | |
1413 | if (isa<UndefValue>(Val: Elt)) |
1414 | continue; |
1415 | |
1416 | if (!isa<ConstantInt>(Val: Elt)) |
1417 | return nullptr; |
1418 | } |
1419 | return ConstantExpr::getNeg(C: CV); |
1420 | } |
1421 | |
1422 | // Negate integer vector splats. |
1423 | if (auto *CV = dyn_cast<Constant>(Val: V)) |
1424 | if (CV->getType()->isVectorTy() && |
1425 | CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue()) |
1426 | return ConstantExpr::getNeg(C: CV); |
1427 | |
1428 | return nullptr; |
1429 | } |
1430 | |
1431 | // Try to fold: |
1432 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
1433 | // -> ({s|u}itofp (int_binop x, y)) |
1434 | // 2) (fp_binop ({s|u}itofp x), FpC) |
1435 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
1436 | // |
1437 | // Assuming the sign of the cast for x/y is `OpsFromSigned`. |
1438 | Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( |
1439 | BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps, |
1440 | Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) { |
1441 | |
1442 | Type *FPTy = BO.getType(); |
1443 | Type *IntTy = IntOps[0]->getType(); |
1444 | |
1445 | unsigned IntSz = IntTy->getScalarSizeInBits(); |
1446 | // This is the maximum number of inuse bits by the integer where the int -> fp |
1447 | // casts are exact. |
1448 | unsigned MaxRepresentableBits = |
1449 | APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics()); |
1450 | |
1451 | // Preserve known number of leading bits. This can allow us to trivial nsw/nuw |
1452 | // checks later on. |
1453 | unsigned NumUsedLeadingBits[2] = {IntSz, IntSz}; |
1454 | |
1455 | // NB: This only comes up if OpsFromSigned is true, so there is no need to |
1456 | // cache if between calls to `foldFBinOpOfIntCastsFromSign`. |
1457 | auto IsNonZero = [&](unsigned OpNo) -> bool { |
1458 | if (OpsKnown[OpNo].hasKnownBits() && |
1459 | OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero()) |
1460 | return true; |
1461 | return isKnownNonZero(V: IntOps[OpNo], Q: SQ); |
1462 | }; |
1463 | |
1464 | auto IsNonNeg = [&](unsigned OpNo) -> bool { |
1465 | // NB: This matches the impl in ValueTracking, we just try to use cached |
1466 | // knownbits here. If we ever start supporting WithCache for |
1467 | // `isKnownNonNegative`, change this to an explicit call. |
1468 | return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative(); |
1469 | }; |
1470 | |
1471 | // Check if we know for certain that ({s|u}itofp op) is exact. |
1472 | auto IsValidPromotion = [&](unsigned OpNo) -> bool { |
1473 | // Can we treat this operand as the desired sign? |
1474 | if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) && |
1475 | !IsNonNeg(OpNo)) |
1476 | return false; |
1477 | |
1478 | // If fp precision >= bitwidth(op) then its exact. |
1479 | // NB: This is slightly conservative for `sitofp`. For signed conversion, we |
1480 | // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be |
1481 | // handled specially. We can't, however, increase the bound arbitrarily for |
1482 | // `sitofp` as for larger sizes, it won't sign extend. |
1483 | if (MaxRepresentableBits < IntSz) { |
1484 | // Otherwise if its signed cast check that fp precisions >= bitwidth(op) - |
1485 | // numSignBits(op). |
1486 | // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change |
1487 | // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`. |
1488 | if (OpsFromSigned) |
1489 | NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]); |
1490 | // Finally for unsigned check that fp precision >= bitwidth(op) - |
1491 | // numLeadingZeros(op). |
1492 | else { |
1493 | NumUsedLeadingBits[OpNo] = |
1494 | IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros(); |
1495 | } |
1496 | } |
1497 | // NB: We could also check if op is known to be a power of 2 or zero (which |
1498 | // will always be representable). Its unlikely, however, that is we are |
1499 | // unable to bound op in any way we will be able to pass the overflow checks |
1500 | // later on. |
1501 | |
1502 | if (MaxRepresentableBits < NumUsedLeadingBits[OpNo]) |
1503 | return false; |
1504 | // Signed + Mul also requires that op is non-zero to avoid -0 cases. |
1505 | return !OpsFromSigned || BO.getOpcode() != Instruction::FMul || |
1506 | IsNonZero(OpNo); |
1507 | }; |
1508 | |
1509 | // If we have a constant rhs, see if we can losslessly convert it to an int. |
1510 | if (Op1FpC != nullptr) { |
1511 | // Signed + Mul req non-zero |
1512 | if (OpsFromSigned && BO.getOpcode() == Instruction::FMul && |
1513 | !match(V: Op1FpC, P: m_NonZeroFP())) |
1514 | return nullptr; |
1515 | |
1516 | Constant *Op1IntC = ConstantFoldCastOperand( |
1517 | Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC, |
1518 | DestTy: IntTy, DL); |
1519 | if (Op1IntC == nullptr) |
1520 | return nullptr; |
1521 | if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP |
1522 | : Instruction::UIToFP, |
1523 | C: Op1IntC, DestTy: FPTy, DL) != Op1FpC) |
1524 | return nullptr; |
1525 | |
1526 | // First try to keep sign of cast the same. |
1527 | IntOps[1] = Op1IntC; |
1528 | } |
1529 | |
1530 | // Ensure lhs/rhs integer types match. |
1531 | if (IntTy != IntOps[1]->getType()) |
1532 | return nullptr; |
1533 | |
1534 | if (Op1FpC == nullptr) { |
1535 | if (!IsValidPromotion(1)) |
1536 | return nullptr; |
1537 | } |
1538 | if (!IsValidPromotion(0)) |
1539 | return nullptr; |
1540 | |
1541 | // Final we check if the integer version of the binop will not overflow. |
1542 | BinaryOperator::BinaryOps IntOpc; |
1543 | // Because of the precision check, we can often rule out overflows. |
1544 | bool NeedsOverflowCheck = true; |
1545 | // Try to conservatively rule out overflow based on the already done precision |
1546 | // checks. |
1547 | unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1; |
1548 | unsigned OverflowMaxCurBits = |
1549 | std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]); |
1550 | bool OutputSigned = OpsFromSigned; |
1551 | switch (BO.getOpcode()) { |
1552 | case Instruction::FAdd: |
1553 | IntOpc = Instruction::Add; |
1554 | OverflowMaxOutputBits += OverflowMaxCurBits; |
1555 | break; |
1556 | case Instruction::FSub: |
1557 | IntOpc = Instruction::Sub; |
1558 | OverflowMaxOutputBits += OverflowMaxCurBits; |
1559 | break; |
1560 | case Instruction::FMul: |
1561 | IntOpc = Instruction::Mul; |
1562 | OverflowMaxOutputBits += OverflowMaxCurBits * 2; |
1563 | break; |
1564 | default: |
1565 | llvm_unreachable("Unsupported binop" ); |
1566 | } |
1567 | // The precision check may have already ruled out overflow. |
1568 | if (OverflowMaxOutputBits < IntSz) { |
1569 | NeedsOverflowCheck = false; |
1570 | // We can bound unsigned overflow from sub to in range signed value (this is |
1571 | // what allows us to avoid the overflow check for sub). |
1572 | if (IntOpc == Instruction::Sub) |
1573 | OutputSigned = true; |
1574 | } |
1575 | |
1576 | // Precision check did not rule out overflow, so need to check. |
1577 | // TODO: If we add support for `WithCache` in `willNotOverflow`, change |
1578 | // `IntOps[...]` arguments to `KnownOps[...]`. |
1579 | if (NeedsOverflowCheck && |
1580 | !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned)) |
1581 | return nullptr; |
1582 | |
1583 | Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]); |
1584 | if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) { |
1585 | IntBO->setHasNoSignedWrap(OutputSigned); |
1586 | IntBO->setHasNoUnsignedWrap(!OutputSigned); |
1587 | } |
1588 | if (OutputSigned) |
1589 | return new SIToFPInst(IntBinOp, FPTy); |
1590 | return new UIToFPInst(IntBinOp, FPTy); |
1591 | } |
1592 | |
1593 | // Try to fold: |
1594 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
1595 | // -> ({s|u}itofp (int_binop x, y)) |
1596 | // 2) (fp_binop ({s|u}itofp x), FpC) |
1597 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
1598 | Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { |
1599 | std::array<Value *, 2> IntOps = {nullptr, nullptr}; |
1600 | Constant *Op1FpC = nullptr; |
1601 | // Check for: |
1602 | // 1) (binop ({s|u}itofp x), ({s|u}itofp y)) |
1603 | // 2) (binop ({s|u}itofp x), FpC) |
1604 | if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) && |
1605 | !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0])))) |
1606 | return nullptr; |
1607 | |
1608 | if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) && |
1609 | !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) && |
1610 | !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1])))) |
1611 | return nullptr; |
1612 | |
1613 | // Cache KnownBits a bit to potentially save some analysis. |
1614 | SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]}; |
1615 | |
1616 | // Try treating x/y as coming from both `uitofp` and `sitofp`. There are |
1617 | // different constraints depending on the sign of the cast. |
1618 | // NB: `(uitofp nneg X)` == `(sitofp nneg X)`. |
1619 | if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false, |
1620 | IntOps, Op1FpC, OpsKnown)) |
1621 | return R; |
1622 | return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps, |
1623 | Op1FpC, OpsKnown); |
1624 | } |
1625 | |
1626 | /// A binop with a constant operand and a sign-extended boolean operand may be |
1627 | /// converted into a select of constants by applying the binary operation to |
1628 | /// the constant with the two possible values of the extended boolean (0 or -1). |
1629 | Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { |
1630 | // TODO: Handle non-commutative binop (constant is operand 0). |
1631 | // TODO: Handle zext. |
1632 | // TODO: Peek through 'not' of cast. |
1633 | Value *BO0 = BO.getOperand(i_nocapture: 0); |
1634 | Value *BO1 = BO.getOperand(i_nocapture: 1); |
1635 | Value *X; |
1636 | Constant *C; |
1637 | if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) || |
1638 | !X->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
1639 | return nullptr; |
1640 | |
1641 | // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) |
1642 | Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType()); |
1643 | Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType()); |
1644 | Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C); |
1645 | Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C); |
1646 | return SelectInst::Create(C: X, S1: TVal, S2: FVal); |
1647 | } |
1648 | |
1649 | static Constant *constantFoldOperationIntoSelectOperand(Instruction &I, |
1650 | SelectInst *SI, |
1651 | bool IsTrueArm) { |
1652 | SmallVector<Constant *> ConstOps; |
1653 | for (Value *Op : I.operands()) { |
1654 | CmpInst::Predicate Pred; |
1655 | Constant *C = nullptr; |
1656 | if (Op == SI) { |
1657 | C = dyn_cast<Constant>(Val: IsTrueArm ? SI->getTrueValue() |
1658 | : SI->getFalseValue()); |
1659 | } else if (match(V: SI->getCondition(), |
1660 | P: m_ICmp(Pred, L: m_Specific(V: Op), R: m_Constant(C))) && |
1661 | Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) && |
1662 | isGuaranteedNotToBeUndefOrPoison(V: C)) { |
1663 | // Pass |
1664 | } else { |
1665 | C = dyn_cast<Constant>(Val: Op); |
1666 | } |
1667 | if (C == nullptr) |
1668 | return nullptr; |
1669 | |
1670 | ConstOps.push_back(Elt: C); |
1671 | } |
1672 | |
1673 | return ConstantFoldInstOperands(I: &I, Ops: ConstOps, DL: I.getDataLayout()); |
1674 | } |
1675 | |
1676 | static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, |
1677 | Value *NewOp, InstCombiner &IC) { |
1678 | Instruction *Clone = I.clone(); |
1679 | Clone->replaceUsesOfWith(From: SI, To: NewOp); |
1680 | Clone->dropUBImplyingAttrsAndMetadata(); |
1681 | IC.InsertNewInstBefore(New: Clone, Old: SI->getIterator()); |
1682 | return Clone; |
1683 | } |
1684 | |
1685 | Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, |
1686 | bool FoldWithMultiUse) { |
1687 | // Don't modify shared select instructions unless set FoldWithMultiUse |
1688 | if (!SI->hasOneUse() && !FoldWithMultiUse) |
1689 | return nullptr; |
1690 | |
1691 | Value *TV = SI->getTrueValue(); |
1692 | Value *FV = SI->getFalseValue(); |
1693 | if (!(isa<Constant>(Val: TV) || isa<Constant>(Val: FV))) |
1694 | return nullptr; |
1695 | |
1696 | // Bool selects with constant operands can be folded to logical ops. |
1697 | if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
1698 | return nullptr; |
1699 | |
1700 | // Test if a FCmpInst instruction is used exclusively by a select as |
1701 | // part of a minimum or maximum operation. If so, refrain from doing |
1702 | // any other folding. This helps out other analyses which understand |
1703 | // non-obfuscated minimum and maximum idioms. And in this case, at |
1704 | // least one of the comparison operands has at least one user besides |
1705 | // the compare (the select), which would often largely negate the |
1706 | // benefit of folding anyway. |
1707 | if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) { |
1708 | if (CI->hasOneUse()) { |
1709 | Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1); |
1710 | if ((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) |
1711 | return nullptr; |
1712 | } |
1713 | } |
1714 | |
1715 | // Make sure that one of the select arms constant folds successfully. |
1716 | Value *NewTV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm*/ true); |
1717 | Value *NewFV = constantFoldOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm*/ false); |
1718 | if (!NewTV && !NewFV) |
1719 | return nullptr; |
1720 | |
1721 | // Create an instruction for the arm that did not fold. |
1722 | if (!NewTV) |
1723 | NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this); |
1724 | if (!NewFV) |
1725 | NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this); |
1726 | return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "" , InsertBefore: nullptr, MDFrom: SI); |
1727 | } |
1728 | |
1729 | static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN, |
1730 | Value *InValue, BasicBlock *InBB, |
1731 | const DataLayout &DL, |
1732 | const SimplifyQuery SQ) { |
1733 | // NB: It is a precondition of this transform that the operands be |
1734 | // phi translatable! This is usually trivially satisfied by limiting it |
1735 | // to constant ops, and for selects we do a more sophisticated check. |
1736 | SmallVector<Value *> Ops; |
1737 | for (Value *Op : I.operands()) { |
1738 | if (Op == PN) |
1739 | Ops.push_back(Elt: InValue); |
1740 | else |
1741 | Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB)); |
1742 | } |
1743 | |
1744 | // Don't consider the simplification successful if we get back a constant |
1745 | // expression. That's just an instruction in hiding. |
1746 | // Also reject the case where we simplify back to the phi node. We wouldn't |
1747 | // be able to remove it in that case. |
1748 | Value *NewVal = simplifyInstructionWithOperands( |
1749 | I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator())); |
1750 | if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr())) |
1751 | return NewVal; |
1752 | |
1753 | // Check if incoming PHI value can be replaced with constant |
1754 | // based on implied condition. |
1755 | BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator()); |
1756 | const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I); |
1757 | if (TerminatorBI && TerminatorBI->isConditional() && |
1758 | TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) { |
1759 | bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent(); |
1760 | std::optional<bool> ImpliedCond = |
1761 | isImpliedCondition(LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getPredicate(), |
1762 | RHSOp0: Ops[0], RHSOp1: Ops[1], DL, LHSIsTrue); |
1763 | if (ImpliedCond) |
1764 | return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value()); |
1765 | } |
1766 | |
1767 | return nullptr; |
1768 | } |
1769 | |
1770 | Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { |
1771 | unsigned NumPHIValues = PN->getNumIncomingValues(); |
1772 | if (NumPHIValues == 0) |
1773 | return nullptr; |
1774 | |
1775 | // We normally only transform phis with a single use. However, if a PHI has |
1776 | // multiple uses and they are all the same operation, we can fold *all* of the |
1777 | // uses into the PHI. |
1778 | if (!PN->hasOneUse()) { |
1779 | // Walk the use list for the instruction, comparing them to I. |
1780 | for (User *U : PN->users()) { |
1781 | Instruction *UI = cast<Instruction>(Val: U); |
1782 | if (UI != &I && !I.isIdenticalTo(I: UI)) |
1783 | return nullptr; |
1784 | } |
1785 | // Otherwise, we can replace *all* users with the new PHI we form. |
1786 | } |
1787 | |
1788 | // Check to see whether the instruction can be folded into each phi operand. |
1789 | // If there is one operand that does not fold, remember the BB it is in. |
1790 | // If there is more than one or if *it* is a PHI, bail out. |
1791 | SmallVector<Value *> NewPhiValues; |
1792 | BasicBlock *NonSimplifiedBB = nullptr; |
1793 | Value *NonSimplifiedInVal = nullptr; |
1794 | for (unsigned i = 0; i != NumPHIValues; ++i) { |
1795 | Value *InVal = PN->getIncomingValue(i); |
1796 | BasicBlock *InBB = PN->getIncomingBlock(i); |
1797 | |
1798 | if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) { |
1799 | NewPhiValues.push_back(Elt: NewVal); |
1800 | continue; |
1801 | } |
1802 | |
1803 | if (NonSimplifiedBB) return nullptr; // More than one non-simplified value. |
1804 | |
1805 | NonSimplifiedBB = InBB; |
1806 | NonSimplifiedInVal = InVal; |
1807 | NewPhiValues.push_back(Elt: nullptr); |
1808 | |
1809 | // If the InVal is an invoke at the end of the pred block, then we can't |
1810 | // insert a computation after it without breaking the edge. |
1811 | if (isa<InvokeInst>(Val: InVal)) |
1812 | if (cast<Instruction>(Val: InVal)->getParent() == NonSimplifiedBB) |
1813 | return nullptr; |
1814 | |
1815 | // If the incoming non-constant value is reachable from the phis block, |
1816 | // we'll push the operation across a loop backedge. This could result in |
1817 | // an infinite combine loop, and is generally non-profitable (especially |
1818 | // if the operation was originally outside the loop). |
1819 | if (isPotentiallyReachable(From: PN->getParent(), To: NonSimplifiedBB, ExclusionSet: nullptr, DT: &DT, |
1820 | LI)) |
1821 | return nullptr; |
1822 | } |
1823 | |
1824 | // If there is exactly one non-simplified value, we can insert a copy of the |
1825 | // operation in that block. However, if this is a critical edge, we would be |
1826 | // inserting the computation on some other paths (e.g. inside a loop). Only |
1827 | // do this if the pred block is unconditionally branching into the phi block. |
1828 | // Also, make sure that the pred block is not dead code. |
1829 | if (NonSimplifiedBB != nullptr) { |
1830 | BranchInst *BI = dyn_cast<BranchInst>(Val: NonSimplifiedBB->getTerminator()); |
1831 | if (!BI || !BI->isUnconditional() || |
1832 | !DT.isReachableFromEntry(A: NonSimplifiedBB)) |
1833 | return nullptr; |
1834 | } |
1835 | |
1836 | // Okay, we can do the transformation: create the new PHI node. |
1837 | PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues()); |
1838 | InsertNewInstBefore(New: NewPN, Old: PN->getIterator()); |
1839 | NewPN->takeName(V: PN); |
1840 | NewPN->setDebugLoc(PN->getDebugLoc()); |
1841 | |
1842 | // If we are going to have to insert a new computation, do so right before the |
1843 | // predecessor's terminator. |
1844 | Instruction *Clone = nullptr; |
1845 | if (NonSimplifiedBB) { |
1846 | Clone = I.clone(); |
1847 | for (Use &U : Clone->operands()) { |
1848 | if (U == PN) |
1849 | U = NonSimplifiedInVal; |
1850 | else |
1851 | U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: NonSimplifiedBB); |
1852 | } |
1853 | InsertNewInstBefore(New: Clone, Old: NonSimplifiedBB->getTerminator()->getIterator()); |
1854 | } |
1855 | |
1856 | for (unsigned i = 0; i != NumPHIValues; ++i) { |
1857 | if (NewPhiValues[i]) |
1858 | NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i)); |
1859 | else |
1860 | NewPN->addIncoming(V: Clone, BB: PN->getIncomingBlock(i)); |
1861 | } |
1862 | |
1863 | for (User *U : make_early_inc_range(Range: PN->users())) { |
1864 | Instruction *User = cast<Instruction>(Val: U); |
1865 | if (User == &I) continue; |
1866 | replaceInstUsesWith(I&: *User, V: NewPN); |
1867 | eraseInstFromFunction(I&: *User); |
1868 | } |
1869 | |
1870 | replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN), |
1871 | To&: const_cast<PHINode &>(*NewPN), |
1872 | DomPoint&: const_cast<PHINode &>(*PN), DT); |
1873 | return replaceInstUsesWith(I, V: NewPN); |
1874 | } |
1875 | |
1876 | Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { |
1877 | // TODO: This should be similar to the incoming values check in foldOpIntoPhi: |
1878 | // we are guarding against replicating the binop in >1 predecessor. |
1879 | // This could miss matching a phi with 2 constant incoming values. |
1880 | auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0)); |
1881 | auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1)); |
1882 | if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || |
1883 | Phi0->getNumOperands() != Phi1->getNumOperands()) |
1884 | return nullptr; |
1885 | |
1886 | // TODO: Remove the restriction for binop being in the same block as the phis. |
1887 | if (BO.getParent() != Phi0->getParent() || |
1888 | BO.getParent() != Phi1->getParent()) |
1889 | return nullptr; |
1890 | |
1891 | // Fold if there is at least one specific constant value in phi0 or phi1's |
1892 | // incoming values that comes from the same block and this specific constant |
1893 | // value can be used to do optimization for specific binary operator. |
1894 | // For example: |
1895 | // %phi0 = phi i32 [0, %bb0], [%i, %bb1] |
1896 | // %phi1 = phi i32 [%j, %bb0], [0, %bb1] |
1897 | // %add = add i32 %phi0, %phi1 |
1898 | // ==> |
1899 | // %add = phi i32 [%j, %bb0], [%i, %bb1] |
1900 | Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(), |
1901 | /*AllowRHSConstant*/ false); |
1902 | if (C) { |
1903 | SmallVector<Value *, 4> NewIncomingValues; |
1904 | auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) { |
1905 | auto &Phi0Use = std::get<0>(t&: T); |
1906 | auto &Phi1Use = std::get<1>(t&: T); |
1907 | if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use)) |
1908 | return false; |
1909 | Value *Phi0UseV = Phi0Use.get(); |
1910 | Value *Phi1UseV = Phi1Use.get(); |
1911 | if (Phi0UseV == C) |
1912 | NewIncomingValues.push_back(Elt: Phi1UseV); |
1913 | else if (Phi1UseV == C) |
1914 | NewIncomingValues.push_back(Elt: Phi0UseV); |
1915 | else |
1916 | return false; |
1917 | return true; |
1918 | }; |
1919 | |
1920 | if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()), |
1921 | P: CanFoldIncomingValuePair)) { |
1922 | PHINode *NewPhi = |
1923 | PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands()); |
1924 | assert(NewIncomingValues.size() == Phi0->getNumOperands() && |
1925 | "The number of collected incoming values should equal the number " |
1926 | "of the original PHINode operands!" ); |
1927 | for (unsigned I = 0; I < Phi0->getNumOperands(); I++) |
1928 | NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I)); |
1929 | return NewPhi; |
1930 | } |
1931 | } |
1932 | |
1933 | if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) |
1934 | return nullptr; |
1935 | |
1936 | // Match a pair of incoming constants for one of the predecessor blocks. |
1937 | BasicBlock *ConstBB, *OtherBB; |
1938 | Constant *C0, *C1; |
1939 | if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) { |
1940 | ConstBB = Phi0->getIncomingBlock(i: 0); |
1941 | OtherBB = Phi0->getIncomingBlock(i: 1); |
1942 | } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) { |
1943 | ConstBB = Phi0->getIncomingBlock(i: 1); |
1944 | OtherBB = Phi0->getIncomingBlock(i: 0); |
1945 | } else { |
1946 | return nullptr; |
1947 | } |
1948 | if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1))) |
1949 | return nullptr; |
1950 | |
1951 | // The block that we are hoisting to must reach here unconditionally. |
1952 | // Otherwise, we could be speculatively executing an expensive or |
1953 | // non-speculative op. |
1954 | auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator()); |
1955 | if (!PredBlockBranch || PredBlockBranch->isConditional() || |
1956 | !DT.isReachableFromEntry(A: OtherBB)) |
1957 | return nullptr; |
1958 | |
1959 | // TODO: This check could be tightened to only apply to binops (div/rem) that |
1960 | // are not safe to speculatively execute. But that could allow hoisting |
1961 | // potentially expensive instructions (fdiv for example). |
1962 | for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) |
1963 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter)) |
1964 | return nullptr; |
1965 | |
1966 | // Fold constants for the predecessor block with constant incoming values. |
1967 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL); |
1968 | if (!NewC) |
1969 | return nullptr; |
1970 | |
1971 | // Make a new binop in the predecessor block with the non-constant incoming |
1972 | // values. |
1973 | Builder.SetInsertPoint(PredBlockBranch); |
1974 | Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(), |
1975 | LHS: Phi0->getIncomingValueForBlock(BB: OtherBB), |
1976 | RHS: Phi1->getIncomingValueForBlock(BB: OtherBB)); |
1977 | if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
1978 | NotFoldedNewBO->copyIRFlags(V: &BO); |
1979 | |
1980 | // Replace the binop with a phi of the new values. The old phis are dead. |
1981 | PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2); |
1982 | NewPhi->addIncoming(V: NewBO, BB: OtherBB); |
1983 | NewPhi->addIncoming(V: NewC, BB: ConstBB); |
1984 | return NewPhi; |
1985 | } |
1986 | |
1987 | Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { |
1988 | if (!isa<Constant>(Val: I.getOperand(i_nocapture: 1))) |
1989 | return nullptr; |
1990 | |
1991 | if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) { |
1992 | if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel)) |
1993 | return NewSel; |
1994 | } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) { |
1995 | if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) |
1996 | return NewPhi; |
1997 | } |
1998 | return nullptr; |
1999 | } |
2000 | |
2001 | static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { |
2002 | // If this GEP has only 0 indices, it is the same pointer as |
2003 | // Src. If Src is not a trivial GEP too, don't combine |
2004 | // the indices. |
2005 | if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && |
2006 | !Src.hasOneUse()) |
2007 | return false; |
2008 | return true; |
2009 | } |
2010 | |
2011 | Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { |
2012 | if (!isa<VectorType>(Val: Inst.getType())) |
2013 | return nullptr; |
2014 | |
2015 | BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); |
2016 | Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1); |
2017 | assert(cast<VectorType>(LHS->getType())->getElementCount() == |
2018 | cast<VectorType>(Inst.getType())->getElementCount()); |
2019 | assert(cast<VectorType>(RHS->getType())->getElementCount() == |
2020 | cast<VectorType>(Inst.getType())->getElementCount()); |
2021 | |
2022 | // If both operands of the binop are vector concatenations, then perform the |
2023 | // narrow binop on each pair of the source operands followed by concatenation |
2024 | // of the results. |
2025 | Value *L0, *L1, *R0, *R1; |
2026 | ArrayRef<int> Mask; |
2027 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) && |
2028 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) && |
2029 | LHS->hasOneUse() && RHS->hasOneUse() && |
2030 | cast<ShuffleVectorInst>(Val: LHS)->isConcat() && |
2031 | cast<ShuffleVectorInst>(Val: RHS)->isConcat()) { |
2032 | // This transform does not have the speculative execution constraint as |
2033 | // below because the shuffle is a concatenation. The new binops are |
2034 | // operating on exactly the same elements as the existing binop. |
2035 | // TODO: We could ease the mask requirement to allow different undef lanes, |
2036 | // but that requires an analysis of the binop-with-undef output value. |
2037 | Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0); |
2038 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0)) |
2039 | BO->copyIRFlags(V: &Inst); |
2040 | Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1); |
2041 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1)) |
2042 | BO->copyIRFlags(V: &Inst); |
2043 | return new ShuffleVectorInst(NewBO0, NewBO1, Mask); |
2044 | } |
2045 | |
2046 | auto createBinOpReverse = [&](Value *X, Value *Y) { |
2047 | Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName()); |
2048 | if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) |
2049 | BO->copyIRFlags(V: &Inst); |
2050 | Module *M = Inst.getModule(); |
2051 | Function *F = |
2052 | Intrinsic::getDeclaration(M, id: Intrinsic::vector_reverse, Tys: V->getType()); |
2053 | return CallInst::Create(Func: F, Args: V); |
2054 | }; |
2055 | |
2056 | // NOTE: Reverse shuffles don't require the speculative execution protection |
2057 | // below because they don't affect which lanes take part in the computation. |
2058 | |
2059 | Value *V1, *V2; |
2060 | if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) { |
2061 | // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) |
2062 | if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) && |
2063 | (LHS->hasOneUse() || RHS->hasOneUse() || |
2064 | (LHS == RHS && LHS->hasNUses(N: 2)))) |
2065 | return createBinOpReverse(V1, V2); |
2066 | |
2067 | // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) |
2068 | if (LHS->hasOneUse() && isSplatValue(V: RHS)) |
2069 | return createBinOpReverse(V1, RHS); |
2070 | } |
2071 | // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) |
2072 | else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2))))) |
2073 | return createBinOpReverse(LHS, V2); |
2074 | |
2075 | // It may not be safe to reorder shuffles and things like div, urem, etc. |
2076 | // because we may trap when executing those ops on unknown vector elements. |
2077 | // See PR20059. |
2078 | if (!isSafeToSpeculativelyExecute(I: &Inst)) |
2079 | return nullptr; |
2080 | |
2081 | auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) { |
2082 | Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
2083 | if (auto *BO = dyn_cast<BinaryOperator>(Val: XY)) |
2084 | BO->copyIRFlags(V: &Inst); |
2085 | return new ShuffleVectorInst(XY, M); |
2086 | }; |
2087 | |
2088 | // If both arguments of the binary operation are shuffles that use the same |
2089 | // mask and shuffle within a single vector, move the shuffle after the binop. |
2090 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) && |
2091 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) && |
2092 | V1->getType() == V2->getType() && |
2093 | (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { |
2094 | // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) |
2095 | return createBinOpShuffle(V1, V2, Mask); |
2096 | } |
2097 | |
2098 | // If both arguments of a commutative binop are select-shuffles that use the |
2099 | // same mask with commuted operands, the shuffles are unnecessary. |
2100 | if (Inst.isCommutative() && |
2101 | match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) && |
2102 | match(V: RHS, |
2103 | P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) { |
2104 | auto *LShuf = cast<ShuffleVectorInst>(Val: LHS); |
2105 | auto *RShuf = cast<ShuffleVectorInst>(Val: RHS); |
2106 | // TODO: Allow shuffles that contain undefs in the mask? |
2107 | // That is legal, but it reduces undef knowledge. |
2108 | // TODO: Allow arbitrary shuffles by shuffling after binop? |
2109 | // That might be legal, but we have to deal with poison. |
2110 | if (LShuf->isSelect() && |
2111 | !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) && |
2112 | RShuf->isSelect() && |
2113 | !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) { |
2114 | // Example: |
2115 | // LHS = shuffle V1, V2, <0, 5, 6, 3> |
2116 | // RHS = shuffle V2, V1, <0, 5, 6, 3> |
2117 | // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 |
2118 | Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2); |
2119 | NewBO->copyIRFlags(V: &Inst); |
2120 | return NewBO; |
2121 | } |
2122 | } |
2123 | |
2124 | // If one argument is a shuffle within one vector and the other is a constant, |
2125 | // try moving the shuffle after the binary operation. This canonicalization |
2126 | // intends to move shuffles closer to other shuffles and binops closer to |
2127 | // other binops, so they can be folded. It may also enable demanded elements |
2128 | // transforms. |
2129 | Constant *C; |
2130 | auto *InstVTy = dyn_cast<FixedVectorType>(Val: Inst.getType()); |
2131 | if (InstVTy && |
2132 | match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), |
2133 | mask: m_Mask(Mask))), |
2134 | R: m_ImmConstant(C))) && |
2135 | cast<FixedVectorType>(Val: V1->getType())->getNumElements() <= |
2136 | InstVTy->getNumElements()) { |
2137 | assert(InstVTy->getScalarType() == V1->getType()->getScalarType() && |
2138 | "Shuffle should not change scalar type" ); |
2139 | |
2140 | // Find constant NewC that has property: |
2141 | // shuffle(NewC, ShMask) = C |
2142 | // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>) |
2143 | // reorder is not possible. A 1-to-1 mapping is not required. Example: |
2144 | // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef> |
2145 | bool ConstOp1 = isa<Constant>(Val: RHS); |
2146 | ArrayRef<int> ShMask = Mask; |
2147 | unsigned SrcVecNumElts = |
2148 | cast<FixedVectorType>(Val: V1->getType())->getNumElements(); |
2149 | PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType()); |
2150 | SmallVector<Constant *, 16> NewVecC(SrcVecNumElts, PoisonScalar); |
2151 | bool MayChange = true; |
2152 | unsigned NumElts = InstVTy->getNumElements(); |
2153 | for (unsigned I = 0; I < NumElts; ++I) { |
2154 | Constant *CElt = C->getAggregateElement(Elt: I); |
2155 | if (ShMask[I] >= 0) { |
2156 | assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle" ); |
2157 | Constant *NewCElt = NewVecC[ShMask[I]]; |
2158 | // Bail out if: |
2159 | // 1. The constant vector contains a constant expression. |
2160 | // 2. The shuffle needs an element of the constant vector that can't |
2161 | // be mapped to a new constant vector. |
2162 | // 3. This is a widening shuffle that copies elements of V1 into the |
2163 | // extended elements (extending with poison is allowed). |
2164 | if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) || |
2165 | I >= SrcVecNumElts) { |
2166 | MayChange = false; |
2167 | break; |
2168 | } |
2169 | NewVecC[ShMask[I]] = CElt; |
2170 | } |
2171 | // If this is a widening shuffle, we must be able to extend with poison |
2172 | // elements. If the original binop does not produce a poison in the high |
2173 | // lanes, then this transform is not safe. |
2174 | // Similarly for poison lanes due to the shuffle mask, we can only |
2175 | // transform binops that preserve poison. |
2176 | // TODO: We could shuffle those non-poison constant values into the |
2177 | // result by using a constant vector (rather than an poison vector) |
2178 | // as operand 1 of the new binop, but that might be too aggressive |
2179 | // for target-independent shuffle creation. |
2180 | if (I >= SrcVecNumElts || ShMask[I] < 0) { |
2181 | Constant *MaybePoison = |
2182 | ConstOp1 |
2183 | ? ConstantFoldBinaryOpOperands(Opcode, LHS: PoisonScalar, RHS: CElt, DL) |
2184 | : ConstantFoldBinaryOpOperands(Opcode, LHS: CElt, RHS: PoisonScalar, DL); |
2185 | if (!MaybePoison || !isa<PoisonValue>(Val: MaybePoison)) { |
2186 | MayChange = false; |
2187 | break; |
2188 | } |
2189 | } |
2190 | } |
2191 | if (MayChange) { |
2192 | Constant *NewC = ConstantVector::get(V: NewVecC); |
2193 | // It may not be safe to execute a binop on a vector with poison elements |
2194 | // because the entire instruction can be folded to undef or create poison |
2195 | // that did not exist in the original code. |
2196 | // TODO: The shift case should not be necessary. |
2197 | if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) |
2198 | NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1); |
2199 | |
2200 | // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) |
2201 | // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) |
2202 | Value *NewLHS = ConstOp1 ? V1 : NewC; |
2203 | Value *NewRHS = ConstOp1 ? NewC : V1; |
2204 | return createBinOpShuffle(NewLHS, NewRHS, Mask); |
2205 | } |
2206 | } |
2207 | |
2208 | // Try to reassociate to sink a splat shuffle after a binary operation. |
2209 | if (Inst.isAssociative() && Inst.isCommutative()) { |
2210 | // Canonicalize shuffle operand as LHS. |
2211 | if (isa<ShuffleVectorInst>(Val: RHS)) |
2212 | std::swap(a&: LHS, b&: RHS); |
2213 | |
2214 | Value *X; |
2215 | ArrayRef<int> MaskC; |
2216 | int SplatIndex; |
2217 | Value *Y, *OtherOp; |
2218 | if (!match(V: LHS, |
2219 | P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) || |
2220 | !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) || |
2221 | X->getType() != Inst.getType() || |
2222 | !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp))))) |
2223 | return nullptr; |
2224 | |
2225 | // FIXME: This may not be safe if the analysis allows undef elements. By |
2226 | // moving 'Y' before the splat shuffle, we are implicitly assuming |
2227 | // that it is not undef/poison at the splat index. |
2228 | if (isSplatValue(V: OtherOp, Index: SplatIndex)) { |
2229 | std::swap(a&: Y, b&: OtherOp); |
2230 | } else if (!isSplatValue(V: Y, Index: SplatIndex)) { |
2231 | return nullptr; |
2232 | } |
2233 | |
2234 | // X and Y are splatted values, so perform the binary operation on those |
2235 | // values followed by a splat followed by the 2nd binary operation: |
2236 | // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp |
2237 | Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
2238 | SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex); |
2239 | Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask); |
2240 | Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp); |
2241 | |
2242 | // Intersect FMF on both new binops. Other (poison-generating) flags are |
2243 | // dropped to be safe. |
2244 | if (isa<FPMathOperator>(Val: R)) { |
2245 | R->copyFastMathFlags(I: &Inst); |
2246 | R->andIRFlags(V: RHS); |
2247 | } |
2248 | if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
2249 | NewInstBO->copyIRFlags(V: R); |
2250 | return R; |
2251 | } |
2252 | |
2253 | return nullptr; |
2254 | } |
2255 | |
2256 | /// Try to narrow the width of a binop if at least 1 operand is an extend of |
2257 | /// of a value. This requires a potentially expensive known bits check to make |
2258 | /// sure the narrow op does not overflow. |
2259 | Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { |
2260 | // We need at least one extended operand. |
2261 | Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1); |
2262 | |
2263 | // If this is a sub, we swap the operands since we always want an extension |
2264 | // on the RHS. The LHS can be an extension or a constant. |
2265 | if (BO.getOpcode() == Instruction::Sub) |
2266 | std::swap(a&: Op0, b&: Op1); |
2267 | |
2268 | Value *X; |
2269 | bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X))); |
2270 | if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X)))) |
2271 | return nullptr; |
2272 | |
2273 | // If both operands are the same extension from the same source type and we |
2274 | // can eliminate at least one (hasOneUse), this might work. |
2275 | CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; |
2276 | Value *Y; |
2277 | if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() && |
2278 | cast<Operator>(Val: Op1)->getOpcode() == CastOpc && |
2279 | (Op0->hasOneUse() || Op1->hasOneUse()))) { |
2280 | // If that did not match, see if we have a suitable constant operand. |
2281 | // Truncating and extending must produce the same constant. |
2282 | Constant *WideC; |
2283 | if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC))) |
2284 | return nullptr; |
2285 | Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc); |
2286 | if (!NarrowC) |
2287 | return nullptr; |
2288 | Y = NarrowC; |
2289 | } |
2290 | |
2291 | // Swap back now that we found our operands. |
2292 | if (BO.getOpcode() == Instruction::Sub) |
2293 | std::swap(a&: X, b&: Y); |
2294 | |
2295 | // Both operands have narrow versions. Last step: the math must not overflow |
2296 | // in the narrow width. |
2297 | if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext)) |
2298 | return nullptr; |
2299 | |
2300 | // bo (ext X), (ext Y) --> ext (bo X, Y) |
2301 | // bo (ext X), C --> ext (bo X, C') |
2302 | Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow" ); |
2303 | if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) { |
2304 | if (IsSext) |
2305 | NewBinOp->setHasNoSignedWrap(); |
2306 | else |
2307 | NewBinOp->setHasNoUnsignedWrap(); |
2308 | } |
2309 | return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType()); |
2310 | } |
2311 | |
2312 | static bool isMergedGEPInBounds(GEPOperator &GEP1, GEPOperator &GEP2) { |
2313 | return GEP1.isInBounds() && GEP2.isInBounds(); |
2314 | } |
2315 | |
2316 | /// Thread a GEP operation with constant indices through the constant true/false |
2317 | /// arms of a select. |
2318 | static Instruction *foldSelectGEP(GetElementPtrInst &GEP, |
2319 | InstCombiner::BuilderTy &Builder) { |
2320 | if (!GEP.hasAllConstantIndices()) |
2321 | return nullptr; |
2322 | |
2323 | Instruction *Sel; |
2324 | Value *Cond; |
2325 | Constant *TrueC, *FalseC; |
2326 | if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) || |
2327 | !match(V: Sel, |
2328 | P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC)))) |
2329 | return nullptr; |
2330 | |
2331 | // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC' |
2332 | // Propagate 'inbounds' and metadata from existing instructions. |
2333 | // Note: using IRBuilder to create the constants for efficiency. |
2334 | SmallVector<Value *, 4> IndexC(GEP.indices()); |
2335 | GEPNoWrapFlags NW = GEP.getNoWrapFlags(); |
2336 | Type *Ty = GEP.getSourceElementType(); |
2337 | Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "" , NW); |
2338 | Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "" , NW); |
2339 | return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "" , InsertBefore: nullptr, MDFrom: Sel); |
2340 | } |
2341 | |
2342 | // Canonicalization: |
2343 | // gep T, (gep i8, base, C1), (Index + C2) into |
2344 | // gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index |
2345 | static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, |
2346 | GEPOperator *Src, |
2347 | InstCombinerImpl &IC) { |
2348 | if (GEP.getNumIndices() != 1) |
2349 | return nullptr; |
2350 | auto &DL = IC.getDataLayout(); |
2351 | Value *Base; |
2352 | const APInt *C1; |
2353 | if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1)))) |
2354 | return nullptr; |
2355 | Value *VarIndex; |
2356 | const APInt *C2; |
2357 | Type *PtrTy = Src->getType()->getScalarType(); |
2358 | unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy); |
2359 | if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2)))) |
2360 | return nullptr; |
2361 | if (C1->getBitWidth() != IndexSizeInBits || |
2362 | C2->getBitWidth() != IndexSizeInBits) |
2363 | return nullptr; |
2364 | Type *BaseType = GEP.getSourceElementType(); |
2365 | if (isa<ScalableVectorType>(Val: BaseType)) |
2366 | return nullptr; |
2367 | APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType)); |
2368 | APInt NewOffset = TypeSize * *C2 + *C1; |
2369 | if (NewOffset.isZero() || |
2370 | (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) { |
2371 | Value *GEPConst = |
2372 | IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset)); |
2373 | return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex); |
2374 | } |
2375 | |
2376 | return nullptr; |
2377 | } |
2378 | |
2379 | Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, |
2380 | GEPOperator *Src) { |
2381 | // Combine Indices - If the source pointer to this getelementptr instruction |
2382 | // is a getelementptr instruction with matching element type, combine the |
2383 | // indices of the two getelementptr instructions into a single instruction. |
2384 | if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src)) |
2385 | return nullptr; |
2386 | |
2387 | if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this)) |
2388 | return I; |
2389 | |
2390 | // For constant GEPs, use a more general offset-based folding approach. |
2391 | Type *PtrTy = Src->getType()->getScalarType(); |
2392 | if (GEP.hasAllConstantIndices() && |
2393 | (Src->hasOneUse() || Src->hasAllConstantIndices())) { |
2394 | // Split Src into a variable part and a constant suffix. |
2395 | gep_type_iterator GTI = gep_type_begin(GEP: *Src); |
2396 | Type *BaseType = GTI.getIndexedType(); |
2397 | bool IsFirstType = true; |
2398 | unsigned NumVarIndices = 0; |
2399 | for (auto Pair : enumerate(First: Src->indices())) { |
2400 | if (!isa<ConstantInt>(Val: Pair.value())) { |
2401 | BaseType = GTI.getIndexedType(); |
2402 | IsFirstType = false; |
2403 | NumVarIndices = Pair.index() + 1; |
2404 | } |
2405 | ++GTI; |
2406 | } |
2407 | |
2408 | // Determine the offset for the constant suffix of Src. |
2409 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), 0); |
2410 | if (NumVarIndices != Src->getNumIndices()) { |
2411 | // FIXME: getIndexedOffsetInType() does not handled scalable vectors. |
2412 | if (BaseType->isScalableTy()) |
2413 | return nullptr; |
2414 | |
2415 | SmallVector<Value *> ConstantIndices; |
2416 | if (!IsFirstType) |
2417 | ConstantIndices.push_back( |
2418 | Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext()))); |
2419 | append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices)); |
2420 | Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices); |
2421 | } |
2422 | |
2423 | // Add the offset for GEP (which is fully constant). |
2424 | if (!GEP.accumulateConstantOffset(DL, Offset)) |
2425 | return nullptr; |
2426 | |
2427 | APInt OffsetOld = Offset; |
2428 | // Convert the total offset back into indices. |
2429 | SmallVector<APInt> ConstIndices = |
2430 | DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset); |
2431 | if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) { |
2432 | // If both GEP are constant-indexed, and cannot be merged in either way, |
2433 | // convert them to a GEP of i8. |
2434 | if (Src->hasAllConstantIndices()) |
2435 | return replaceInstUsesWith( |
2436 | I&: GEP, V: Builder.CreateGEP( |
2437 | Ty: Builder.getInt8Ty(), Ptr: Src->getOperand(i_nocapture: 0), |
2438 | IdxList: Builder.getInt(AI: OffsetOld), Name: "" , |
2439 | NW: isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)))); |
2440 | return nullptr; |
2441 | } |
2442 | |
2443 | bool IsInBounds = isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)); |
2444 | SmallVector<Value *> Indices; |
2445 | append_range(C&: Indices, R: drop_end(RangeOrContainer: Src->indices(), |
2446 | N: Src->getNumIndices() - NumVarIndices)); |
2447 | for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) { |
2448 | Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx)); |
2449 | // Even if the total offset is inbounds, we may end up representing it |
2450 | // by first performing a larger negative offset, and then a smaller |
2451 | // positive one. The large negative offset might go out of bounds. Only |
2452 | // preserve inbounds if all signs are the same. |
2453 | IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative(); |
2454 | } |
2455 | |
2456 | return replaceInstUsesWith( |
2457 | I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), |
2458 | IdxList: Indices, Name: "" , NW: IsInBounds)); |
2459 | } |
2460 | |
2461 | if (Src->getResultElementType() != GEP.getSourceElementType()) |
2462 | return nullptr; |
2463 | |
2464 | SmallVector<Value*, 8> Indices; |
2465 | |
2466 | // Find out whether the last index in the source GEP is a sequential idx. |
2467 | bool EndsWithSequential = false; |
2468 | for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src); |
2469 | I != E; ++I) |
2470 | EndsWithSequential = I.isSequential(); |
2471 | |
2472 | // Can we combine the two pointer arithmetics offsets? |
2473 | if (EndsWithSequential) { |
2474 | // Replace: gep (gep %P, long B), long A, ... |
2475 | // With: T = long A+B; gep %P, T, ... |
2476 | Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-1); |
2477 | Value *GO1 = GEP.getOperand(i_nocapture: 1); |
2478 | |
2479 | // If they aren't the same type, then the input hasn't been processed |
2480 | // by the loop above yet (which canonicalizes sequential index types to |
2481 | // intptr_t). Just avoid transforming this until the input has been |
2482 | // normalized. |
2483 | if (SO1->getType() != GO1->getType()) |
2484 | return nullptr; |
2485 | |
2486 | Value *Sum = |
2487 | simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP)); |
2488 | // Only do the combine when we are sure the cost after the |
2489 | // merge is never more than that before the merge. |
2490 | if (Sum == nullptr) |
2491 | return nullptr; |
2492 | |
2493 | // Update the GEP in place if possible. |
2494 | if (Src->getNumOperands() == 2) { |
2495 | GEP.setIsInBounds(isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))); |
2496 | replaceOperand(I&: GEP, OpNum: 0, V: Src->getOperand(i_nocapture: 0)); |
2497 | replaceOperand(I&: GEP, OpNum: 1, V: Sum); |
2498 | return &GEP; |
2499 | } |
2500 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()-1); |
2501 | Indices.push_back(Elt: Sum); |
2502 | Indices.append(in_start: GEP.op_begin()+2, in_end: GEP.op_end()); |
2503 | } else if (isa<Constant>(Val: *GEP.idx_begin()) && |
2504 | cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() && |
2505 | Src->getNumOperands() != 1) { |
2506 | // Otherwise we can do the fold if the first index of the GEP is a zero |
2507 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()); |
2508 | Indices.append(in_start: GEP.idx_begin()+1, in_end: GEP.idx_end()); |
2509 | } |
2510 | |
2511 | if (!Indices.empty()) |
2512 | return replaceInstUsesWith( |
2513 | I&: GEP, V: Builder.CreateGEP( |
2514 | Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "" , |
2515 | NW: isMergedGEPInBounds(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)))); |
2516 | |
2517 | return nullptr; |
2518 | } |
2519 | |
2520 | Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, |
2521 | BuilderTy *Builder, |
2522 | bool &DoesConsume, unsigned Depth) { |
2523 | static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1)); |
2524 | // ~(~(X)) -> X. |
2525 | Value *A, *B; |
2526 | if (match(V, P: m_Not(V: m_Value(V&: A)))) { |
2527 | DoesConsume = true; |
2528 | return A; |
2529 | } |
2530 | |
2531 | Constant *C; |
2532 | // Constants can be considered to be not'ed values. |
2533 | if (match(V, P: m_ImmConstant(C))) |
2534 | return ConstantExpr::getNot(C); |
2535 | |
2536 | if (Depth++ >= MaxAnalysisRecursionDepth) |
2537 | return nullptr; |
2538 | |
2539 | // The rest of the cases require that we invert all uses so don't bother |
2540 | // doing the analysis if we know we can't use the result. |
2541 | if (!WillInvertAllUses) |
2542 | return nullptr; |
2543 | |
2544 | // Compares can be inverted if all of their uses are being modified to use |
2545 | // the ~V. |
2546 | if (auto *I = dyn_cast<CmpInst>(Val: V)) { |
2547 | if (Builder != nullptr) |
2548 | return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0), |
2549 | RHS: I->getOperand(i_nocapture: 1)); |
2550 | return NonNull; |
2551 | } |
2552 | |
2553 | // If `V` is of the form `A + B` then `-1 - V` can be folded into |
2554 | // `(-1 - B) - A` if we are willing to invert all of the uses. |
2555 | if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2556 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2557 | DoesConsume, Depth)) |
2558 | return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull; |
2559 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2560 | DoesConsume, Depth)) |
2561 | return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull; |
2562 | return nullptr; |
2563 | } |
2564 | |
2565 | // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded |
2566 | // into `A ^ B` if we are willing to invert all of the uses. |
2567 | if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2568 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2569 | DoesConsume, Depth)) |
2570 | return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull; |
2571 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2572 | DoesConsume, Depth)) |
2573 | return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull; |
2574 | return nullptr; |
2575 | } |
2576 | |
2577 | // If `V` is of the form `B - A` then `-1 - V` can be folded into |
2578 | // `A + (-1 - B)` if we are willing to invert all of the uses. |
2579 | if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2580 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2581 | DoesConsume, Depth)) |
2582 | return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull; |
2583 | return nullptr; |
2584 | } |
2585 | |
2586 | // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded |
2587 | // into `A s>> B` if we are willing to invert all of the uses. |
2588 | if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2589 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2590 | DoesConsume, Depth)) |
2591 | return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull; |
2592 | return nullptr; |
2593 | } |
2594 | |
2595 | Value *Cond; |
2596 | // LogicOps are special in that we canonicalize them at the cost of an |
2597 | // instruction. |
2598 | bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) && |
2599 | !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V)); |
2600 | // Selects/min/max with invertible operands are freely invertible |
2601 | if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2602 | bool LocalDoesConsume = DoesConsume; |
2603 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr, |
2604 | DoesConsume&: LocalDoesConsume, Depth)) |
2605 | return nullptr; |
2606 | if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2607 | DoesConsume&: LocalDoesConsume, Depth)) { |
2608 | DoesConsume = LocalDoesConsume; |
2609 | if (Builder != nullptr) { |
2610 | Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2611 | DoesConsume, Depth); |
2612 | assert(NotB != nullptr && |
2613 | "Unable to build inverted value for known freely invertable op" ); |
2614 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) |
2615 | return Builder->CreateBinaryIntrinsic( |
2616 | ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB); |
2617 | return Builder->CreateSelect(C: Cond, True: NotA, False: NotB); |
2618 | } |
2619 | return NonNull; |
2620 | } |
2621 | } |
2622 | |
2623 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
2624 | bool LocalDoesConsume = DoesConsume; |
2625 | SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues; |
2626 | for (Use &U : PN->operands()) { |
2627 | BasicBlock *IncomingBlock = PN->getIncomingBlock(U); |
2628 | Value *NewIncomingVal = getFreelyInvertedImpl( |
2629 | V: U.get(), /*WillInvertAllUses=*/false, |
2630 | /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1); |
2631 | if (NewIncomingVal == nullptr) |
2632 | return nullptr; |
2633 | // Make sure that we can safely erase the original PHI node. |
2634 | if (NewIncomingVal == V) |
2635 | return nullptr; |
2636 | if (Builder != nullptr) |
2637 | IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock); |
2638 | } |
2639 | |
2640 | DoesConsume = LocalDoesConsume; |
2641 | if (Builder != nullptr) { |
2642 | IRBuilderBase::InsertPointGuard Guard(*Builder); |
2643 | Builder->SetInsertPoint(PN); |
2644 | PHINode *NewPN = |
2645 | Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues()); |
2646 | for (auto [Val, Pred] : IncomingValues) |
2647 | NewPN->addIncoming(V: Val, BB: Pred); |
2648 | return NewPN; |
2649 | } |
2650 | return NonNull; |
2651 | } |
2652 | |
2653 | if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) { |
2654 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2655 | DoesConsume, Depth)) |
2656 | return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull; |
2657 | return nullptr; |
2658 | } |
2659 | |
2660 | if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) { |
2661 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2662 | DoesConsume, Depth)) |
2663 | return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull; |
2664 | return nullptr; |
2665 | } |
2666 | |
2667 | // De Morgan's Laws: |
2668 | // (~(A | B)) -> (~A & ~B) |
2669 | // (~(A & B)) -> (~A | ~B) |
2670 | auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode, |
2671 | bool IsLogical, Value *A, |
2672 | Value *B) -> Value * { |
2673 | bool LocalDoesConsume = DoesConsume; |
2674 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr, |
2675 | DoesConsume&: LocalDoesConsume, Depth)) |
2676 | return nullptr; |
2677 | if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2678 | DoesConsume&: LocalDoesConsume, Depth)) { |
2679 | auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2680 | DoesConsume&: LocalDoesConsume, Depth); |
2681 | DoesConsume = LocalDoesConsume; |
2682 | if (IsLogical) |
2683 | return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull; |
2684 | return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull; |
2685 | } |
2686 | |
2687 | return nullptr; |
2688 | }; |
2689 | |
2690 | if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2691 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A, |
2692 | B); |
2693 | |
2694 | if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2695 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A, |
2696 | B); |
2697 | |
2698 | if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2699 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A, |
2700 | B); |
2701 | |
2702 | if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2703 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A, |
2704 | B); |
2705 | |
2706 | return nullptr; |
2707 | } |
2708 | |
2709 | Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { |
2710 | Value *PtrOp = GEP.getOperand(i_nocapture: 0); |
2711 | SmallVector<Value *, 8> Indices(GEP.indices()); |
2712 | Type *GEPType = GEP.getType(); |
2713 | Type *GEPEltType = GEP.getSourceElementType(); |
2714 | if (Value *V = |
2715 | simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(), |
2716 | Q: SQ.getWithInstruction(I: &GEP))) |
2717 | return replaceInstUsesWith(I&: GEP, V); |
2718 | |
2719 | // For vector geps, use the generic demanded vector support. |
2720 | // Skip if GEP return type is scalable. The number of elements is unknown at |
2721 | // compile-time. |
2722 | if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) { |
2723 | auto VWidth = GEPFVTy->getNumElements(); |
2724 | APInt PoisonElts(VWidth, 0); |
2725 | APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth)); |
2726 | if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask, |
2727 | PoisonElts)) { |
2728 | if (V != &GEP) |
2729 | return replaceInstUsesWith(I&: GEP, V); |
2730 | return &GEP; |
2731 | } |
2732 | |
2733 | // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if |
2734 | // possible (decide on canonical form for pointer broadcast), 3) exploit |
2735 | // undef elements to decrease demanded bits |
2736 | } |
2737 | |
2738 | // Eliminate unneeded casts for indices, and replace indices which displace |
2739 | // by multiples of a zero size type with zero. |
2740 | bool MadeChange = false; |
2741 | |
2742 | // Index width may not be the same width as pointer width. |
2743 | // Data layout chooses the right type based on supported integer types. |
2744 | Type *NewScalarIndexTy = |
2745 | DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType()); |
2746 | |
2747 | gep_type_iterator GTI = gep_type_begin(GEP); |
2748 | for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; |
2749 | ++I, ++GTI) { |
2750 | // Skip indices into struct types. |
2751 | if (GTI.isStruct()) |
2752 | continue; |
2753 | |
2754 | Type *IndexTy = (*I)->getType(); |
2755 | Type *NewIndexType = |
2756 | IndexTy->isVectorTy() |
2757 | ? VectorType::get(ElementType: NewScalarIndexTy, |
2758 | EC: cast<VectorType>(Val: IndexTy)->getElementCount()) |
2759 | : NewScalarIndexTy; |
2760 | |
2761 | // If the element type has zero size then any index over it is equivalent |
2762 | // to an index of zero, so replace it with zero if it is not zero already. |
2763 | Type *EltTy = GTI.getIndexedType(); |
2764 | if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero()) |
2765 | if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) { |
2766 | *I = Constant::getNullValue(Ty: NewIndexType); |
2767 | MadeChange = true; |
2768 | } |
2769 | |
2770 | if (IndexTy != NewIndexType) { |
2771 | // If we are using a wider index than needed for this platform, shrink |
2772 | // it to what we need. If narrower, sign-extend it to what we need. |
2773 | // This explicit cast can make subsequent optimizations more obvious. |
2774 | *I = Builder.CreateIntCast(V: *I, DestTy: NewIndexType, isSigned: true); |
2775 | MadeChange = true; |
2776 | } |
2777 | } |
2778 | if (MadeChange) |
2779 | return &GEP; |
2780 | |
2781 | // Canonicalize constant GEPs to i8 type. |
2782 | if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) { |
2783 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0); |
2784 | if (GEP.accumulateConstantOffset(DL, Offset)) |
2785 | return replaceInstUsesWith( |
2786 | I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "" , |
2787 | NW: GEP.getNoWrapFlags())); |
2788 | } |
2789 | |
2790 | // Canonicalize |
2791 | // - scalable GEPs to an explicit offset using the llvm.vscale intrinsic. |
2792 | // This has better support in BasicAA. |
2793 | // - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two |
2794 | // multiplies together. |
2795 | if (GEPEltType->isScalableTy() || |
2796 | (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.getNumIndices() == 1 && |
2797 | match(V: GEP.getOperand(i_nocapture: 1), |
2798 | P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()), |
2799 | R: m_Shl(L: m_Value(), R: m_ConstantInt())))))) { |
2800 | Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP)); |
2801 | return replaceInstUsesWith( |
2802 | I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "" , NW: GEP.getNoWrapFlags())); |
2803 | } |
2804 | |
2805 | // Check to see if the inputs to the PHI node are getelementptr instructions. |
2806 | if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) { |
2807 | auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0)); |
2808 | if (!Op1) |
2809 | return nullptr; |
2810 | |
2811 | // Don't fold a GEP into itself through a PHI node. This can only happen |
2812 | // through the back-edge of a loop. Folding a GEP into itself means that |
2813 | // the value of the previous iteration needs to be stored in the meantime, |
2814 | // thus requiring an additional register variable to be live, but not |
2815 | // actually achieving anything (the GEP still needs to be executed once per |
2816 | // loop iteration). |
2817 | if (Op1 == &GEP) |
2818 | return nullptr; |
2819 | |
2820 | int DI = -1; |
2821 | |
2822 | for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { |
2823 | auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I); |
2824 | if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() || |
2825 | Op1->getSourceElementType() != Op2->getSourceElementType()) |
2826 | return nullptr; |
2827 | |
2828 | // As for Op1 above, don't try to fold a GEP into itself. |
2829 | if (Op2 == &GEP) |
2830 | return nullptr; |
2831 | |
2832 | // Keep track of the type as we walk the GEP. |
2833 | Type *CurTy = nullptr; |
2834 | |
2835 | for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { |
2836 | if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType()) |
2837 | return nullptr; |
2838 | |
2839 | if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) { |
2840 | if (DI == -1) { |
2841 | // We have not seen any differences yet in the GEPs feeding the |
2842 | // PHI yet, so we record this one if it is allowed to be a |
2843 | // variable. |
2844 | |
2845 | // The first two arguments can vary for any GEP, the rest have to be |
2846 | // static for struct slots |
2847 | if (J > 1) { |
2848 | assert(CurTy && "No current type?" ); |
2849 | if (CurTy->isStructTy()) |
2850 | return nullptr; |
2851 | } |
2852 | |
2853 | DI = J; |
2854 | } else { |
2855 | // The GEP is different by more than one input. While this could be |
2856 | // extended to support GEPs that vary by more than one variable it |
2857 | // doesn't make sense since it greatly increases the complexity and |
2858 | // would result in an R+R+R addressing mode which no backend |
2859 | // directly supports and would need to be broken into several |
2860 | // simpler instructions anyway. |
2861 | return nullptr; |
2862 | } |
2863 | } |
2864 | |
2865 | // Sink down a layer of the type for the next iteration. |
2866 | if (J > 0) { |
2867 | if (J == 1) { |
2868 | CurTy = Op1->getSourceElementType(); |
2869 | } else { |
2870 | CurTy = |
2871 | GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J)); |
2872 | } |
2873 | } |
2874 | } |
2875 | } |
2876 | |
2877 | // If not all GEPs are identical we'll have to create a new PHI node. |
2878 | // Check that the old PHI node has only one use so that it will get |
2879 | // removed. |
2880 | if (DI != -1 && !PN->hasOneUse()) |
2881 | return nullptr; |
2882 | |
2883 | auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone()); |
2884 | if (DI == -1) { |
2885 | // All the GEPs feeding the PHI are identical. Clone one down into our |
2886 | // BB so that it can be merged with the current GEP. |
2887 | } else { |
2888 | // All the GEPs feeding the PHI differ at a single offset. Clone a GEP |
2889 | // into the current block so it can be merged, and create a new PHI to |
2890 | // set that index. |
2891 | PHINode *NewPN; |
2892 | { |
2893 | IRBuilderBase::InsertPointGuard Guard(Builder); |
2894 | Builder.SetInsertPoint(PN); |
2895 | NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(), |
2896 | NumReservedValues: PN->getNumOperands()); |
2897 | } |
2898 | |
2899 | for (auto &I : PN->operands()) |
2900 | NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI), |
2901 | BB: PN->getIncomingBlock(U: I)); |
2902 | |
2903 | NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN); |
2904 | } |
2905 | |
2906 | NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt()); |
2907 | return replaceOperand(I&: GEP, OpNum: 0, V: NewGEP); |
2908 | } |
2909 | |
2910 | if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp)) |
2911 | if (Instruction *I = visitGEPOfGEP(GEP, Src)) |
2912 | return I; |
2913 | |
2914 | if (GEP.getNumIndices() == 1) { |
2915 | unsigned AS = GEP.getPointerAddressSpace(); |
2916 | if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() == |
2917 | DL.getIndexSizeInBits(AS)) { |
2918 | uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue(); |
2919 | |
2920 | if (TyAllocSize == 1) { |
2921 | // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), |
2922 | // but only if the result pointer is only used as if it were an integer, |
2923 | // or both point to the same underlying object (otherwise provenance is |
2924 | // not necessarily retained). |
2925 | Value *X = GEP.getPointerOperand(); |
2926 | Value *Y; |
2927 | if (match(V: GEP.getOperand(i_nocapture: 1), |
2928 | P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) && |
2929 | GEPType == Y->getType()) { |
2930 | bool HasSameUnderlyingObject = |
2931 | getUnderlyingObject(V: X) == getUnderlyingObject(V: Y); |
2932 | bool Changed = false; |
2933 | GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) { |
2934 | bool ShouldReplace = HasSameUnderlyingObject || |
2935 | isa<ICmpInst>(Val: U.getUser()) || |
2936 | isa<PtrToIntInst>(Val: U.getUser()); |
2937 | Changed |= ShouldReplace; |
2938 | return ShouldReplace; |
2939 | }); |
2940 | return Changed ? &GEP : nullptr; |
2941 | } |
2942 | } else if (auto *ExactIns = |
2943 | dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) { |
2944 | // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) |
2945 | Value *V; |
2946 | if (ExactIns->isExact()) { |
2947 | if ((has_single_bit(Value: TyAllocSize) && |
2948 | match(V: GEP.getOperand(i_nocapture: 1), |
2949 | P: m_Shr(L: m_Value(V), |
2950 | R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) || |
2951 | match(V: GEP.getOperand(i_nocapture: 1), |
2952 | P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) { |
2953 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
2954 | Ptr: GEP.getPointerOperand(), IdxList: V, |
2955 | NW: GEP.getNoWrapFlags()); |
2956 | } |
2957 | } |
2958 | if (ExactIns->isExact() && ExactIns->hasOneUse()) { |
2959 | // Try to canonicalize non-i8 element type to i8 if the index is an |
2960 | // exact instruction. If the index is an exact instruction (div/shr) |
2961 | // with a constant RHS, we can fold the non-i8 element scale into the |
2962 | // div/shr (similiar to the mul case, just inverted). |
2963 | const APInt *C; |
2964 | std::optional<APInt> NewC; |
2965 | if (has_single_bit(Value: TyAllocSize) && |
2966 | match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) && |
2967 | C->uge(RHS: countr_zero(Val: TyAllocSize))) |
2968 | NewC = *C - countr_zero(Val: TyAllocSize); |
2969 | else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
2970 | APInt Quot; |
2971 | uint64_t Rem; |
2972 | APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
2973 | if (Rem == 0) |
2974 | NewC = Quot; |
2975 | } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
2976 | APInt Quot; |
2977 | int64_t Rem; |
2978 | APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
2979 | // For sdiv we need to make sure we arent creating INT_MIN / -1. |
2980 | if (!Quot.isAllOnes() && Rem == 0) |
2981 | NewC = Quot; |
2982 | } |
2983 | |
2984 | if (NewC.has_value()) { |
2985 | Value *NewOp = Builder.CreateBinOp( |
2986 | Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V, |
2987 | RHS: ConstantInt::get(Ty: V->getType(), V: *NewC)); |
2988 | cast<BinaryOperator>(Val: NewOp)->setIsExact(); |
2989 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
2990 | Ptr: GEP.getPointerOperand(), IdxList: NewOp, |
2991 | NW: GEP.getNoWrapFlags()); |
2992 | } |
2993 | } |
2994 | } |
2995 | } |
2996 | } |
2997 | // We do not handle pointer-vector geps here. |
2998 | if (GEPType->isVectorTy()) |
2999 | return nullptr; |
3000 | |
3001 | if (GEP.getNumIndices() == 1) { |
3002 | // We can only preserve inbounds if the original gep is inbounds, the add |
3003 | // is nsw, and the add operands are non-negative. |
3004 | auto CanPreserveInBounds = [&](bool AddIsNSW, Value *Idx1, Value *Idx2) { |
3005 | SimplifyQuery Q = SQ.getWithInstruction(I: &GEP); |
3006 | return GEP.isInBounds() && AddIsNSW && isKnownNonNegative(V: Idx1, SQ: Q) && |
3007 | isKnownNonNegative(V: Idx2, SQ: Q); |
3008 | }; |
3009 | |
3010 | // Try to replace ADD + GEP with GEP + GEP. |
3011 | Value *Idx1, *Idx2; |
3012 | if (match(V: GEP.getOperand(i_nocapture: 1), |
3013 | P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) { |
3014 | // %idx = add i64 %idx1, %idx2 |
3015 | // %gep = getelementptr i32, ptr %ptr, i64 %idx |
3016 | // as: |
3017 | // %newptr = getelementptr i32, ptr %ptr, i64 %idx1 |
3018 | // %newgep = getelementptr i32, ptr %newptr, i64 %idx2 |
3019 | bool IsInBounds = CanPreserveInBounds( |
3020 | cast<OverflowingBinaryOperator>(Val: GEP.getOperand(i_nocapture: 1))->hasNoSignedWrap(), |
3021 | Idx1, Idx2); |
3022 | auto *NewPtr = |
3023 | Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
3024 | IdxList: Idx1, Name: "" , NW: IsInBounds); |
3025 | return replaceInstUsesWith( |
3026 | I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr, IdxList: Idx2, Name: "" , |
3027 | NW: IsInBounds)); |
3028 | } |
3029 | ConstantInt *C; |
3030 | if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAdd( |
3031 | L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) { |
3032 | // %add = add nsw i32 %idx1, idx2 |
3033 | // %sidx = sext i32 %add to i64 |
3034 | // %gep = getelementptr i32, ptr %ptr, i64 %sidx |
3035 | // as: |
3036 | // %newptr = getelementptr i32, ptr %ptr, i32 %idx1 |
3037 | // %newgep = getelementptr i32, ptr %newptr, i32 idx2 |
3038 | bool IsInBounds = CanPreserveInBounds( |
3039 | /*IsNSW=*/true, Idx1, C); |
3040 | auto *NewPtr = Builder.CreateGEP( |
3041 | Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
3042 | IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "" , |
3043 | NW: IsInBounds); |
3044 | return replaceInstUsesWith( |
3045 | I&: GEP, |
3046 | V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr, |
3047 | IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), |
3048 | Name: "" , NW: IsInBounds)); |
3049 | } |
3050 | } |
3051 | |
3052 | if (!GEP.isInBounds()) { |
3053 | unsigned IdxWidth = |
3054 | DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace()); |
3055 | APInt BasePtrOffset(IdxWidth, 0); |
3056 | Value *UnderlyingPtrOp = |
3057 | PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, |
3058 | Offset&: BasePtrOffset); |
3059 | bool CanBeNull, CanBeFreed; |
3060 | uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes( |
3061 | DL, CanBeNull, CanBeFreed); |
3062 | if (!CanBeNull && !CanBeFreed && DerefBytes != 0) { |
3063 | if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) && |
3064 | BasePtrOffset.isNonNegative()) { |
3065 | APInt AllocSize(IdxWidth, DerefBytes); |
3066 | if (BasePtrOffset.ule(RHS: AllocSize)) { |
3067 | return GetElementPtrInst::CreateInBounds( |
3068 | PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName()); |
3069 | } |
3070 | } |
3071 | } |
3072 | } |
3073 | |
3074 | if (Instruction *R = foldSelectGEP(GEP, Builder)) |
3075 | return R; |
3076 | |
3077 | return nullptr; |
3078 | } |
3079 | |
3080 | static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, |
3081 | Instruction *AI) { |
3082 | if (isa<ConstantPointerNull>(Val: V)) |
3083 | return true; |
3084 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) |
3085 | return isa<GlobalVariable>(Val: LI->getPointerOperand()); |
3086 | // Two distinct allocations will never be equal. |
3087 | return isAllocLikeFn(V, TLI: &TLI) && V != AI; |
3088 | } |
3089 | |
3090 | /// Given a call CB which uses an address UsedV, return true if we can prove the |
3091 | /// call's only possible effect is storing to V. |
3092 | static bool isRemovableWrite(CallBase &CB, Value *UsedV, |
3093 | const TargetLibraryInfo &TLI) { |
3094 | if (!CB.use_empty()) |
3095 | // TODO: add recursion if returned attribute is present |
3096 | return false; |
3097 | |
3098 | if (CB.isTerminator()) |
3099 | // TODO: remove implementation restriction |
3100 | return false; |
3101 | |
3102 | if (!CB.willReturn() || !CB.doesNotThrow()) |
3103 | return false; |
3104 | |
3105 | // If the only possible side effect of the call is writing to the alloca, |
3106 | // and the result isn't used, we can safely remove any reads implied by the |
3107 | // call including those which might read the alloca itself. |
3108 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI); |
3109 | return Dest && Dest->Ptr == UsedV; |
3110 | } |
3111 | |
3112 | static bool isAllocSiteRemovable(Instruction *AI, |
3113 | SmallVectorImpl<WeakTrackingVH> &Users, |
3114 | const TargetLibraryInfo &TLI) { |
3115 | SmallVector<Instruction*, 4> Worklist; |
3116 | const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI); |
3117 | Worklist.push_back(Elt: AI); |
3118 | |
3119 | do { |
3120 | Instruction *PI = Worklist.pop_back_val(); |
3121 | for (User *U : PI->users()) { |
3122 | Instruction *I = cast<Instruction>(Val: U); |
3123 | switch (I->getOpcode()) { |
3124 | default: |
3125 | // Give up the moment we see something we can't handle. |
3126 | return false; |
3127 | |
3128 | case Instruction::AddrSpaceCast: |
3129 | case Instruction::BitCast: |
3130 | case Instruction::GetElementPtr: |
3131 | Users.emplace_back(Args&: I); |
3132 | Worklist.push_back(Elt: I); |
3133 | continue; |
3134 | |
3135 | case Instruction::ICmp: { |
3136 | ICmpInst *ICI = cast<ICmpInst>(Val: I); |
3137 | // We can fold eq/ne comparisons with null to false/true, respectively. |
3138 | // We also fold comparisons in some conditions provided the alloc has |
3139 | // not escaped (see isNeverEqualToUnescapedAlloc). |
3140 | if (!ICI->isEquality()) |
3141 | return false; |
3142 | unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0; |
3143 | if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI)) |
3144 | return false; |
3145 | |
3146 | // Do not fold compares to aligned_alloc calls, as they may have to |
3147 | // return null in case the required alignment cannot be satisfied, |
3148 | // unless we can prove that both alignment and size are valid. |
3149 | auto AlignmentAndSizeKnownValid = [](CallBase *CB) { |
3150 | // Check if alignment and size of a call to aligned_alloc is valid, |
3151 | // that is alignment is a power-of-2 and the size is a multiple of the |
3152 | // alignment. |
3153 | const APInt *Alignment; |
3154 | const APInt *Size; |
3155 | return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) && |
3156 | match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) && |
3157 | Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero(); |
3158 | }; |
3159 | auto *CB = dyn_cast<CallBase>(Val: AI); |
3160 | LibFunc TheLibFunc; |
3161 | if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) && |
3162 | TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc && |
3163 | !AlignmentAndSizeKnownValid(CB)) |
3164 | return false; |
3165 | Users.emplace_back(Args&: I); |
3166 | continue; |
3167 | } |
3168 | |
3169 | case Instruction::Call: |
3170 | // Ignore no-op and store intrinsics. |
3171 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
3172 | switch (II->getIntrinsicID()) { |
3173 | default: |
3174 | return false; |
3175 | |
3176 | case Intrinsic::memmove: |
3177 | case Intrinsic::memcpy: |
3178 | case Intrinsic::memset: { |
3179 | MemIntrinsic *MI = cast<MemIntrinsic>(Val: II); |
3180 | if (MI->isVolatile() || MI->getRawDest() != PI) |
3181 | return false; |
3182 | [[fallthrough]]; |
3183 | } |
3184 | case Intrinsic::assume: |
3185 | case Intrinsic::invariant_start: |
3186 | case Intrinsic::invariant_end: |
3187 | case Intrinsic::lifetime_start: |
3188 | case Intrinsic::lifetime_end: |
3189 | case Intrinsic::objectsize: |
3190 | Users.emplace_back(Args&: I); |
3191 | continue; |
3192 | case Intrinsic::launder_invariant_group: |
3193 | case Intrinsic::strip_invariant_group: |
3194 | Users.emplace_back(Args&: I); |
3195 | Worklist.push_back(Elt: I); |
3196 | continue; |
3197 | } |
3198 | } |
3199 | |
3200 | if (isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) { |
3201 | Users.emplace_back(Args&: I); |
3202 | continue; |
3203 | } |
3204 | |
3205 | if (getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI && |
3206 | getAllocationFamily(I, TLI: &TLI) == Family) { |
3207 | assert(Family); |
3208 | Users.emplace_back(Args&: I); |
3209 | continue; |
3210 | } |
3211 | |
3212 | if (getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI && |
3213 | getAllocationFamily(I, TLI: &TLI) == Family) { |
3214 | assert(Family); |
3215 | Users.emplace_back(Args&: I); |
3216 | Worklist.push_back(Elt: I); |
3217 | continue; |
3218 | } |
3219 | |
3220 | return false; |
3221 | |
3222 | case Instruction::Store: { |
3223 | StoreInst *SI = cast<StoreInst>(Val: I); |
3224 | if (SI->isVolatile() || SI->getPointerOperand() != PI) |
3225 | return false; |
3226 | Users.emplace_back(Args&: I); |
3227 | continue; |
3228 | } |
3229 | } |
3230 | llvm_unreachable("missing a return?" ); |
3231 | } |
3232 | } while (!Worklist.empty()); |
3233 | return true; |
3234 | } |
3235 | |
3236 | Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { |
3237 | assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); |
3238 | |
3239 | // If we have a malloc call which is only used in any amount of comparisons to |
3240 | // null and free calls, delete the calls and replace the comparisons with true |
3241 | // or false as appropriate. |
3242 | |
3243 | // This is based on the principle that we can substitute our own allocation |
3244 | // function (which will never return null) rather than knowledge of the |
3245 | // specific function being called. In some sense this can change the permitted |
3246 | // outputs of a program (when we convert a malloc to an alloca, the fact that |
3247 | // the allocation is now on the stack is potentially visible, for example), |
3248 | // but we believe in a permissible manner. |
3249 | SmallVector<WeakTrackingVH, 64> Users; |
3250 | |
3251 | // If we are removing an alloca with a dbg.declare, insert dbg.value calls |
3252 | // before each store. |
3253 | SmallVector<DbgVariableIntrinsic *, 8> DVIs; |
3254 | SmallVector<DbgVariableRecord *, 8> DVRs; |
3255 | std::unique_ptr<DIBuilder> DIB; |
3256 | if (isa<AllocaInst>(Val: MI)) { |
3257 | findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs); |
3258 | DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); |
3259 | } |
3260 | |
3261 | if (isAllocSiteRemovable(AI: &MI, Users, TLI)) { |
3262 | for (unsigned i = 0, e = Users.size(); i != e; ++i) { |
3263 | // Lowering all @llvm.objectsize calls first because they may |
3264 | // use a bitcast/GEP of the alloca we are removing. |
3265 | if (!Users[i]) |
3266 | continue; |
3267 | |
3268 | Instruction *I = cast<Instruction>(Val: &*Users[i]); |
3269 | |
3270 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
3271 | if (II->getIntrinsicID() == Intrinsic::objectsize) { |
3272 | SmallVector<Instruction *> InsertedInstructions; |
3273 | Value *Result = lowerObjectSizeCall( |
3274 | ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions); |
3275 | for (Instruction *Inserted : InsertedInstructions) |
3276 | Worklist.add(I: Inserted); |
3277 | replaceInstUsesWith(I&: *I, V: Result); |
3278 | eraseInstFromFunction(I&: *I); |
3279 | Users[i] = nullptr; // Skip examining in the next loop. |
3280 | } |
3281 | } |
3282 | } |
3283 | for (unsigned i = 0, e = Users.size(); i != e; ++i) { |
3284 | if (!Users[i]) |
3285 | continue; |
3286 | |
3287 | Instruction *I = cast<Instruction>(Val: &*Users[i]); |
3288 | |
3289 | if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) { |
3290 | replaceInstUsesWith(I&: *C, |
3291 | V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()), |
3292 | V: C->isFalseWhenEqual())); |
3293 | } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) { |
3294 | for (auto *DVI : DVIs) |
3295 | if (DVI->isAddressOfVariable()) |
3296 | ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB); |
3297 | for (auto *DVR : DVRs) |
3298 | if (DVR->isAddressOfVariable()) |
3299 | ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB); |
3300 | } else { |
3301 | // Casts, GEP, or anything else: we're about to delete this instruction, |
3302 | // so it can not have any valid uses. |
3303 | replaceInstUsesWith(I&: *I, V: PoisonValue::get(T: I->getType())); |
3304 | } |
3305 | eraseInstFromFunction(I&: *I); |
3306 | } |
3307 | |
3308 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) { |
3309 | // Replace invoke with a NOP intrinsic to maintain the original CFG |
3310 | Module *M = II->getModule(); |
3311 | Function *F = Intrinsic::getDeclaration(M, id: Intrinsic::donothing); |
3312 | InvokeInst::Create(Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), |
3313 | Args: std::nullopt, NameStr: "" , InsertBefore: II->getParent()); |
3314 | } |
3315 | |
3316 | // Remove debug intrinsics which describe the value contained within the |
3317 | // alloca. In addition to removing dbg.{declare,addr} which simply point to |
3318 | // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.: |
3319 | // |
3320 | // ``` |
3321 | // define void @foo(i32 %0) { |
3322 | // %a = alloca i32 ; Deleted. |
3323 | // store i32 %0, i32* %a |
3324 | // dbg.value(i32 %0, "arg0") ; Not deleted. |
3325 | // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. |
3326 | // call void @trivially_inlinable_no_op(i32* %a) |
3327 | // ret void |
3328 | // } |
3329 | // ``` |
3330 | // |
3331 | // This may not be required if we stop describing the contents of allocas |
3332 | // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in |
3333 | // the LowerDbgDeclare utility. |
3334 | // |
3335 | // If there is a dead store to `%a` in @trivially_inlinable_no_op, the |
3336 | // "arg0" dbg.value may be stale after the call. However, failing to remove |
3337 | // the DW_OP_deref dbg.value causes large gaps in location coverage. |
3338 | // |
3339 | // FIXME: the Assignment Tracking project has now likely made this |
3340 | // redundant (and it's sometimes harmful). |
3341 | for (auto *DVI : DVIs) |
3342 | if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) |
3343 | DVI->eraseFromParent(); |
3344 | for (auto *DVR : DVRs) |
3345 | if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref()) |
3346 | DVR->eraseFromParent(); |
3347 | |
3348 | return eraseInstFromFunction(I&: MI); |
3349 | } |
3350 | return nullptr; |
3351 | } |
3352 | |
3353 | /// Move the call to free before a NULL test. |
3354 | /// |
3355 | /// Check if this free is accessed after its argument has been test |
3356 | /// against NULL (property 0). |
3357 | /// If yes, it is legal to move this call in its predecessor block. |
3358 | /// |
3359 | /// The move is performed only if the block containing the call to free |
3360 | /// will be removed, i.e.: |
3361 | /// 1. it has only one predecessor P, and P has two successors |
3362 | /// 2. it contains the call, noops, and an unconditional branch |
3363 | /// 3. its successor is the same as its predecessor's successor |
3364 | /// |
3365 | /// The profitability is out-of concern here and this function should |
3366 | /// be called only if the caller knows this transformation would be |
3367 | /// profitable (e.g., for code size). |
3368 | static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, |
3369 | const DataLayout &DL) { |
3370 | Value *Op = FI.getArgOperand(i: 0); |
3371 | BasicBlock *FreeInstrBB = FI.getParent(); |
3372 | BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); |
3373 | |
3374 | // Validate part of constraint #1: Only one predecessor |
3375 | // FIXME: We can extend the number of predecessor, but in that case, we |
3376 | // would duplicate the call to free in each predecessor and it may |
3377 | // not be profitable even for code size. |
3378 | if (!PredBB) |
3379 | return nullptr; |
3380 | |
3381 | // Validate constraint #2: Does this block contains only the call to |
3382 | // free, noops, and an unconditional branch? |
3383 | BasicBlock *SuccBB; |
3384 | Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator(); |
3385 | if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB))) |
3386 | return nullptr; |
3387 | |
3388 | // If there are only 2 instructions in the block, at this point, |
3389 | // this is the call to free and unconditional. |
3390 | // If there are more than 2 instructions, check that they are noops |
3391 | // i.e., they won't hurt the performance of the generated code. |
3392 | if (FreeInstrBB->size() != 2) { |
3393 | for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) { |
3394 | if (&Inst == &FI || &Inst == FreeInstrBBTerminator) |
3395 | continue; |
3396 | auto *Cast = dyn_cast<CastInst>(Val: &Inst); |
3397 | if (!Cast || !Cast->isNoopCast(DL)) |
3398 | return nullptr; |
3399 | } |
3400 | } |
3401 | // Validate the rest of constraint #1 by matching on the pred branch. |
3402 | Instruction *TI = PredBB->getTerminator(); |
3403 | BasicBlock *TrueBB, *FalseBB; |
3404 | ICmpInst::Predicate Pred; |
3405 | if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, |
3406 | L: m_CombineOr(L: m_Specific(V: Op), |
3407 | R: m_Specific(V: Op->stripPointerCasts())), |
3408 | R: m_Zero()), |
3409 | T&: TrueBB, F&: FalseBB))) |
3410 | return nullptr; |
3411 | if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) |
3412 | return nullptr; |
3413 | |
3414 | // Validate constraint #3: Ensure the null case just falls through. |
3415 | if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) |
3416 | return nullptr; |
3417 | assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && |
3418 | "Broken CFG: missing edge from predecessor to successor" ); |
3419 | |
3420 | // At this point, we know that everything in FreeInstrBB can be moved |
3421 | // before TI. |
3422 | for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) { |
3423 | if (&Instr == FreeInstrBBTerminator) |
3424 | break; |
3425 | Instr.moveBeforePreserving(MovePos: TI); |
3426 | } |
3427 | assert(FreeInstrBB->size() == 1 && |
3428 | "Only the branch instruction should remain" ); |
3429 | |
3430 | // Now that we've moved the call to free before the NULL check, we have to |
3431 | // remove any attributes on its parameter that imply it's non-null, because |
3432 | // those attributes might have only been valid because of the NULL check, and |
3433 | // we can get miscompiles if we keep them. This is conservative if non-null is |
3434 | // also implied by something other than the NULL check, but it's guaranteed to |
3435 | // be correct, and the conservativeness won't matter in practice, since the |
3436 | // attributes are irrelevant for the call to free itself and the pointer |
3437 | // shouldn't be used after the call. |
3438 | AttributeList Attrs = FI.getAttributes(); |
3439 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull); |
3440 | Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable); |
3441 | if (Dereferenceable.isValid()) { |
3442 | uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); |
3443 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, |
3444 | Kind: Attribute::Dereferenceable); |
3445 | Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes); |
3446 | } |
3447 | FI.setAttributes(Attrs); |
3448 | |
3449 | return &FI; |
3450 | } |
3451 | |
3452 | Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { |
3453 | // free undef -> unreachable. |
3454 | if (isa<UndefValue>(Val: Op)) { |
3455 | // Leave a marker since we can't modify the CFG here. |
3456 | CreateNonTerminatorUnreachable(InsertAt: &FI); |
3457 | return eraseInstFromFunction(I&: FI); |
3458 | } |
3459 | |
3460 | // If we have 'free null' delete the instruction. This can happen in stl code |
3461 | // when lots of inlining happens. |
3462 | if (isa<ConstantPointerNull>(Val: Op)) |
3463 | return eraseInstFromFunction(I&: FI); |
3464 | |
3465 | // If we had free(realloc(...)) with no intervening uses, then eliminate the |
3466 | // realloc() entirely. |
3467 | CallInst *CI = dyn_cast<CallInst>(Val: Op); |
3468 | if (CI && CI->hasOneUse()) |
3469 | if (Value *ReallocatedOp = getReallocatedOperand(CB: CI)) |
3470 | return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp)); |
3471 | |
3472 | // If we optimize for code size, try to move the call to free before the null |
3473 | // test so that simplify cfg can remove the empty block and dead code |
3474 | // elimination the branch. I.e., helps to turn something like: |
3475 | // if (foo) free(foo); |
3476 | // into |
3477 | // free(foo); |
3478 | // |
3479 | // Note that we can only do this for 'free' and not for any flavor of |
3480 | // 'operator delete'; there is no 'operator delete' symbol for which we are |
3481 | // permitted to invent a call, even if we're passing in a null pointer. |
3482 | if (MinimizeSize) { |
3483 | LibFunc Func; |
3484 | if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free) |
3485 | if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL)) |
3486 | return I; |
3487 | } |
3488 | |
3489 | return nullptr; |
3490 | } |
3491 | |
3492 | Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { |
3493 | Value *RetVal = RI.getReturnValue(); |
3494 | if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(Ty: RetVal->getType())) |
3495 | return nullptr; |
3496 | |
3497 | Function *F = RI.getFunction(); |
3498 | FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass(); |
3499 | if (ReturnClass == fcNone) |
3500 | return nullptr; |
3501 | |
3502 | KnownFPClass KnownClass; |
3503 | Value *Simplified = |
3504 | SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, Depth: 0, CxtI: &RI); |
3505 | if (!Simplified) |
3506 | return nullptr; |
3507 | |
3508 | return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified); |
3509 | } |
3510 | |
3511 | // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()! |
3512 | bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) { |
3513 | // Try to remove the previous instruction if it must lead to unreachable. |
3514 | // This includes instructions like stores and "llvm.assume" that may not get |
3515 | // removed by simple dead code elimination. |
3516 | bool Changed = false; |
3517 | while (Instruction *Prev = I.getPrevNonDebugInstruction()) { |
3518 | // While we theoretically can erase EH, that would result in a block that |
3519 | // used to start with an EH no longer starting with EH, which is invalid. |
3520 | // To make it valid, we'd need to fixup predecessors to no longer refer to |
3521 | // this block, but that changes CFG, which is not allowed in InstCombine. |
3522 | if (Prev->isEHPad()) |
3523 | break; // Can not drop any more instructions. We're done here. |
3524 | |
3525 | if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev)) |
3526 | break; // Can not drop any more instructions. We're done here. |
3527 | // Otherwise, this instruction can be freely erased, |
3528 | // even if it is not side-effect free. |
3529 | |
3530 | // A value may still have uses before we process it here (for example, in |
3531 | // another unreachable block), so convert those to poison. |
3532 | replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType())); |
3533 | eraseInstFromFunction(I&: *Prev); |
3534 | Changed = true; |
3535 | } |
3536 | return Changed; |
3537 | } |
3538 | |
3539 | Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { |
3540 | removeInstructionsBeforeUnreachable(I); |
3541 | return nullptr; |
3542 | } |
3543 | |
3544 | Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { |
3545 | assert(BI.isUnconditional() && "Only for unconditional branches." ); |
3546 | |
3547 | // If this store is the second-to-last instruction in the basic block |
3548 | // (excluding debug info and bitcasts of pointers) and if the block ends with |
3549 | // an unconditional branch, try to move the store to the successor block. |
3550 | |
3551 | auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { |
3552 | auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) { |
3553 | return BBI->isDebugOrPseudoInst() || |
3554 | (isa<BitCastInst>(Val: BBI) && BBI->getType()->isPointerTy()); |
3555 | }; |
3556 | |
3557 | BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); |
3558 | do { |
3559 | if (BBI != FirstInstr) |
3560 | --BBI; |
3561 | } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI)); |
3562 | |
3563 | return dyn_cast<StoreInst>(Val&: BBI); |
3564 | }; |
3565 | |
3566 | if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI))) |
3567 | if (mergeStoreIntoSuccessor(SI&: *SI)) |
3568 | return &BI; |
3569 | |
3570 | return nullptr; |
3571 | } |
3572 | |
3573 | void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To, |
3574 | SmallVectorImpl<BasicBlock *> &Worklist) { |
3575 | if (!DeadEdges.insert(V: {From, To}).second) |
3576 | return; |
3577 | |
3578 | // Replace phi node operands in successor with poison. |
3579 | for (PHINode &PN : To->phis()) |
3580 | for (Use &U : PN.incoming_values()) |
3581 | if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) { |
3582 | replaceUse(U, NewValue: PoisonValue::get(T: PN.getType())); |
3583 | addToWorklist(I: &PN); |
3584 | MadeIRChange = true; |
3585 | } |
3586 | |
3587 | Worklist.push_back(Elt: To); |
3588 | } |
3589 | |
3590 | // Under the assumption that I is unreachable, remove it and following |
3591 | // instructions. Changes are reported directly to MadeIRChange. |
3592 | void InstCombinerImpl::handleUnreachableFrom( |
3593 | Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) { |
3594 | BasicBlock *BB = I->getParent(); |
3595 | for (Instruction &Inst : make_early_inc_range( |
3596 | Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()), |
3597 | y: std::next(x: I->getReverseIterator())))) { |
3598 | if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) { |
3599 | replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType())); |
3600 | MadeIRChange = true; |
3601 | } |
3602 | if (Inst.isEHPad() || Inst.getType()->isTokenTy()) |
3603 | continue; |
3604 | // RemoveDIs: erase debug-info on this instruction manually. |
3605 | Inst.dropDbgRecords(); |
3606 | eraseInstFromFunction(I&: Inst); |
3607 | MadeIRChange = true; |
3608 | } |
3609 | |
3610 | SmallVector<Value *> Changed; |
3611 | if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) { |
3612 | MadeIRChange = true; |
3613 | for (Value *V : Changed) |
3614 | addToWorklist(I: cast<Instruction>(Val: V)); |
3615 | } |
3616 | |
3617 | // Handle potentially dead successors. |
3618 | for (BasicBlock *Succ : successors(BB)) |
3619 | addDeadEdge(From: BB, To: Succ, Worklist); |
3620 | } |
3621 | |
3622 | void InstCombinerImpl::handlePotentiallyDeadBlocks( |
3623 | SmallVectorImpl<BasicBlock *> &Worklist) { |
3624 | while (!Worklist.empty()) { |
3625 | BasicBlock *BB = Worklist.pop_back_val(); |
3626 | if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
3627 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
3628 | })) |
3629 | continue; |
3630 | |
3631 | handleUnreachableFrom(I: &BB->front(), Worklist); |
3632 | } |
3633 | } |
3634 | |
3635 | void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB, |
3636 | BasicBlock *LiveSucc) { |
3637 | SmallVector<BasicBlock *> Worklist; |
3638 | for (BasicBlock *Succ : successors(BB)) { |
3639 | // The live successor isn't dead. |
3640 | if (Succ == LiveSucc) |
3641 | continue; |
3642 | |
3643 | addDeadEdge(From: BB, To: Succ, Worklist); |
3644 | } |
3645 | |
3646 | handlePotentiallyDeadBlocks(Worklist); |
3647 | } |
3648 | |
3649 | Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { |
3650 | if (BI.isUnconditional()) |
3651 | return visitUnconditionalBranchInst(BI); |
3652 | |
3653 | // Change br (not X), label True, label False to: br X, label False, True |
3654 | Value *Cond = BI.getCondition(); |
3655 | Value *X; |
3656 | if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) { |
3657 | // Swap Destinations and condition... |
3658 | BI.swapSuccessors(); |
3659 | if (BPI) |
3660 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
3661 | return replaceOperand(I&: BI, OpNum: 0, V: X); |
3662 | } |
3663 | |
3664 | // Canonicalize logical-and-with-invert as logical-or-with-invert. |
3665 | // This is done by inverting the condition and swapping successors: |
3666 | // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T |
3667 | Value *Y; |
3668 | if (isa<SelectInst>(Val: Cond) && |
3669 | match(V: Cond, |
3670 | P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) { |
3671 | Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName()); |
3672 | Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y); |
3673 | BI.swapSuccessors(); |
3674 | if (BPI) |
3675 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
3676 | return replaceOperand(I&: BI, OpNum: 0, V: Or); |
3677 | } |
3678 | |
3679 | // If the condition is irrelevant, remove the use so that other |
3680 | // transforms on the condition become more effective. |
3681 | if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1)) |
3682 | return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType())); |
3683 | |
3684 | // Canonicalize, for example, fcmp_one -> fcmp_oeq. |
3685 | CmpInst::Predicate Pred; |
3686 | if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) && |
3687 | !isCanonicalPredicate(Pred)) { |
3688 | // Swap destinations and condition. |
3689 | auto *Cmp = cast<CmpInst>(Val: Cond); |
3690 | Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred)); |
3691 | BI.swapSuccessors(); |
3692 | if (BPI) |
3693 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
3694 | Worklist.push(I: Cmp); |
3695 | return &BI; |
3696 | } |
3697 | |
3698 | if (isa<UndefValue>(Val: Cond)) { |
3699 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr); |
3700 | return nullptr; |
3701 | } |
3702 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
3703 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), |
3704 | LiveSucc: BI.getSuccessor(i: !CI->getZExtValue())); |
3705 | return nullptr; |
3706 | } |
3707 | |
3708 | DC.registerBranch(BI: &BI); |
3709 | return nullptr; |
3710 | } |
3711 | |
3712 | // Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if |
3713 | // we can prove that both (switch C) and (switch X) go to the default when cond |
3714 | // is false/true. |
3715 | static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI, |
3716 | SelectInst *Select, |
3717 | bool IsTrueArm) { |
3718 | unsigned CstOpIdx = IsTrueArm ? 1 : 2; |
3719 | auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx)); |
3720 | if (!C) |
3721 | return nullptr; |
3722 | |
3723 | BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor(); |
3724 | if (CstBB != SI.getDefaultDest()) |
3725 | return nullptr; |
3726 | Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx); |
3727 | ICmpInst::Predicate Pred; |
3728 | const APInt *RHSC; |
3729 | if (!match(V: Select->getCondition(), |
3730 | P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC)))) |
3731 | return nullptr; |
3732 | if (IsTrueArm) |
3733 | Pred = ICmpInst::getInversePredicate(pred: Pred); |
3734 | |
3735 | // See whether we can replace the select with X |
3736 | ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC); |
3737 | for (auto Case : SI.cases()) |
3738 | if (!CR.contains(Val: Case.getCaseValue()->getValue())) |
3739 | return nullptr; |
3740 | |
3741 | return X; |
3742 | } |
3743 | |
3744 | Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { |
3745 | Value *Cond = SI.getCondition(); |
3746 | Value *Op0; |
3747 | ConstantInt *AddRHS; |
3748 | if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) { |
3749 | // Change 'switch (X+4) case 1:' into 'switch (X) case -3'. |
3750 | for (auto Case : SI.cases()) { |
3751 | Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS); |
3752 | assert(isa<ConstantInt>(NewCase) && |
3753 | "Result of expression should be constant" ); |
3754 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
3755 | } |
3756 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
3757 | } |
3758 | |
3759 | ConstantInt *SubLHS; |
3760 | if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) { |
3761 | // Change 'switch (1-X) case 1:' into 'switch (X) case 0'. |
3762 | for (auto Case : SI.cases()) { |
3763 | Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue()); |
3764 | assert(isa<ConstantInt>(NewCase) && |
3765 | "Result of expression should be constant" ); |
3766 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
3767 | } |
3768 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
3769 | } |
3770 | |
3771 | uint64_t ShiftAmt; |
3772 | if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) && |
3773 | ShiftAmt < Op0->getType()->getScalarSizeInBits() && |
3774 | all_of(Range: SI.cases(), P: [&](const auto &Case) { |
3775 | return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt; |
3776 | })) { |
3777 | // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'. |
3778 | OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond); |
3779 | if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() || |
3780 | Shl->hasOneUse()) { |
3781 | Value *NewCond = Op0; |
3782 | if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) { |
3783 | // If the shift may wrap, we need to mask off the shifted bits. |
3784 | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
3785 | NewCond = Builder.CreateAnd( |
3786 | LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt)); |
3787 | } |
3788 | for (auto Case : SI.cases()) { |
3789 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
3790 | APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt) |
3791 | : CaseVal.lshr(shiftAmt: ShiftAmt); |
3792 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase)); |
3793 | } |
3794 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
3795 | } |
3796 | } |
3797 | |
3798 | // Fold switch(zext/sext(X)) into switch(X) if possible. |
3799 | if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) { |
3800 | bool IsZExt = isa<ZExtInst>(Val: Cond); |
3801 | Type *SrcTy = Op0->getType(); |
3802 | unsigned NewWidth = SrcTy->getScalarSizeInBits(); |
3803 | |
3804 | if (all_of(Range: SI.cases(), P: [&](const auto &Case) { |
3805 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
3806 | return IsZExt ? CaseVal.isIntN(N: NewWidth) |
3807 | : CaseVal.isSignedIntN(N: NewWidth); |
3808 | })) { |
3809 | for (auto &Case : SI.cases()) { |
3810 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
3811 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
3812 | } |
3813 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
3814 | } |
3815 | } |
3816 | |
3817 | // Fold switch(select cond, X, Y) into switch(X/Y) if possible |
3818 | if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) { |
3819 | if (Value *V = |
3820 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true)) |
3821 | return replaceOperand(I&: SI, OpNum: 0, V); |
3822 | if (Value *V = |
3823 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false)) |
3824 | return replaceOperand(I&: SI, OpNum: 0, V); |
3825 | } |
3826 | |
3827 | KnownBits Known = computeKnownBits(V: Cond, Depth: 0, CxtI: &SI); |
3828 | unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); |
3829 | unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); |
3830 | |
3831 | // Compute the number of leading bits we can ignore. |
3832 | // TODO: A better way to determine this would use ComputeNumSignBits(). |
3833 | for (const auto &C : SI.cases()) { |
3834 | LeadingKnownZeros = |
3835 | std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero()); |
3836 | LeadingKnownOnes = |
3837 | std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one()); |
3838 | } |
3839 | |
3840 | unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes); |
3841 | |
3842 | // Shrink the condition operand if the new type is smaller than the old type. |
3843 | // But do not shrink to a non-standard type, because backend can't generate |
3844 | // good code for that yet. |
3845 | // TODO: We can make it aggressive again after fixing PR39569. |
3846 | if (NewWidth > 0 && NewWidth < Known.getBitWidth() && |
3847 | shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) { |
3848 | IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth); |
3849 | Builder.SetInsertPoint(&SI); |
3850 | Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc" ); |
3851 | |
3852 | for (auto Case : SI.cases()) { |
3853 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
3854 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
3855 | } |
3856 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
3857 | } |
3858 | |
3859 | if (isa<UndefValue>(Val: Cond)) { |
3860 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr); |
3861 | return nullptr; |
3862 | } |
3863 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
3864 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), |
3865 | LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor()); |
3866 | return nullptr; |
3867 | } |
3868 | |
3869 | return nullptr; |
3870 | } |
3871 | |
3872 | Instruction * |
3873 | InstCombinerImpl::(ExtractValueInst &EV) { |
3874 | auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand()); |
3875 | if (!WO) |
3876 | return nullptr; |
3877 | |
3878 | Intrinsic::ID OvID = WO->getIntrinsicID(); |
3879 | const APInt *C = nullptr; |
3880 | if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) { |
3881 | if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || |
3882 | OvID == Intrinsic::umul_with_overflow)) { |
3883 | // extractvalue (any_mul_with_overflow X, -1), 0 --> -X |
3884 | if (C->isAllOnes()) |
3885 | return BinaryOperator::CreateNeg(Op: WO->getLHS()); |
3886 | // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n |
3887 | if (C->isPowerOf2()) { |
3888 | return BinaryOperator::CreateShl( |
3889 | V1: WO->getLHS(), |
3890 | V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2())); |
3891 | } |
3892 | } |
3893 | } |
3894 | |
3895 | // We're extracting from an overflow intrinsic. See if we're the only user. |
3896 | // That allows us to simplify multiple result intrinsics to simpler things |
3897 | // that just get one value. |
3898 | if (!WO->hasOneUse()) |
3899 | return nullptr; |
3900 | |
3901 | // Check if we're grabbing only the result of a 'with overflow' intrinsic |
3902 | // and replace it with a traditional binary instruction. |
3903 | if (*EV.idx_begin() == 0) { |
3904 | Instruction::BinaryOps BinOp = WO->getBinaryOp(); |
3905 | Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); |
3906 | // Replace the old instruction's uses with poison. |
3907 | replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType())); |
3908 | eraseInstFromFunction(I&: *WO); |
3909 | return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS); |
3910 | } |
3911 | |
3912 | assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst" ); |
3913 | |
3914 | // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS. |
3915 | if (OvID == Intrinsic::usub_with_overflow) |
3916 | return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS()); |
3917 | |
3918 | // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but |
3919 | // +1 is not possible because we assume signed values. |
3920 | if (OvID == Intrinsic::smul_with_overflow && |
3921 | WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
3922 | return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS()); |
3923 | |
3924 | // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1 |
3925 | if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) { |
3926 | unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits(); |
3927 | // Only handle even bitwidths for performance reasons. |
3928 | if (BitWidth % 2 == 0) |
3929 | return new ICmpInst( |
3930 | ICmpInst::ICMP_UGT, WO->getLHS(), |
3931 | ConstantInt::get(Ty: WO->getLHS()->getType(), |
3932 | V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2))); |
3933 | } |
3934 | |
3935 | // If only the overflow result is used, and the right hand side is a |
3936 | // constant (or constant splat), we can remove the intrinsic by directly |
3937 | // checking for overflow. |
3938 | if (C) { |
3939 | // Compute the no-wrap range for LHS given RHS=C, then construct an |
3940 | // equivalent icmp, potentially using an offset. |
3941 | ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( |
3942 | BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind()); |
3943 | |
3944 | CmpInst::Predicate Pred; |
3945 | APInt NewRHSC, Offset; |
3946 | NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset); |
3947 | auto *OpTy = WO->getRHS()->getType(); |
3948 | auto *NewLHS = WO->getLHS(); |
3949 | if (Offset != 0) |
3950 | NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset)); |
3951 | return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS, |
3952 | ConstantInt::get(Ty: OpTy, V: NewRHSC)); |
3953 | } |
3954 | |
3955 | return nullptr; |
3956 | } |
3957 | |
3958 | Instruction *InstCombinerImpl::(ExtractValueInst &EV) { |
3959 | Value *Agg = EV.getAggregateOperand(); |
3960 | |
3961 | if (!EV.hasIndices()) |
3962 | return replaceInstUsesWith(I&: EV, V: Agg); |
3963 | |
3964 | if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(), |
3965 | Q: SQ.getWithInstruction(I: &EV))) |
3966 | return replaceInstUsesWith(I&: EV, V); |
3967 | |
3968 | if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) { |
3969 | // We're extracting from an insertvalue instruction, compare the indices |
3970 | const unsigned *exti, *exte, *insi, *inse; |
3971 | for (exti = EV.idx_begin(), insi = IV->idx_begin(), |
3972 | exte = EV.idx_end(), inse = IV->idx_end(); |
3973 | exti != exte && insi != inse; |
3974 | ++exti, ++insi) { |
3975 | if (*insi != *exti) |
3976 | // The insert and extract both reference distinctly different elements. |
3977 | // This means the extract is not influenced by the insert, and we can |
3978 | // replace the aggregate operand of the extract with the aggregate |
3979 | // operand of the insert. i.e., replace |
3980 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
3981 | // %E = extractvalue { i32, { i32 } } %I, 0 |
3982 | // with |
3983 | // %E = extractvalue { i32, { i32 } } %A, 0 |
3984 | return ExtractValueInst::Create(Agg: IV->getAggregateOperand(), |
3985 | Idxs: EV.getIndices()); |
3986 | } |
3987 | if (exti == exte && insi == inse) |
3988 | // Both iterators are at the end: Index lists are identical. Replace |
3989 | // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
3990 | // %C = extractvalue { i32, { i32 } } %B, 1, 0 |
3991 | // with "i32 42" |
3992 | return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand()); |
3993 | if (exti == exte) { |
3994 | // The extract list is a prefix of the insert list. i.e. replace |
3995 | // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
3996 | // %E = extractvalue { i32, { i32 } } %I, 1 |
3997 | // with |
3998 | // %X = extractvalue { i32, { i32 } } %A, 1 |
3999 | // %E = insertvalue { i32 } %X, i32 42, 0 |
4000 | // by switching the order of the insert and extract (though the |
4001 | // insertvalue should be left in, since it may have other uses). |
4002 | Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(), |
4003 | Idxs: EV.getIndices()); |
4004 | return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(), |
4005 | Idxs: ArrayRef(insi, inse)); |
4006 | } |
4007 | if (insi == inse) |
4008 | // The insert list is a prefix of the extract list |
4009 | // We can simply remove the common indices from the extract and make it |
4010 | // operate on the inserted value instead of the insertvalue result. |
4011 | // i.e., replace |
4012 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
4013 | // %E = extractvalue { i32, { i32 } } %I, 1, 0 |
4014 | // with |
4015 | // %E extractvalue { i32 } { i32 42 }, 0 |
4016 | return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(), |
4017 | Idxs: ArrayRef(exti, exte)); |
4018 | } |
4019 | |
4020 | if (Instruction *R = foldExtractOfOverflowIntrinsic(EV)) |
4021 | return R; |
4022 | |
4023 | if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) { |
4024 | // Bail out if the aggregate contains scalable vector type |
4025 | if (auto *STy = dyn_cast<StructType>(Val: Agg->getType()); |
4026 | STy && STy->containsScalableVectorType()) |
4027 | return nullptr; |
4028 | |
4029 | // If the (non-volatile) load only has one use, we can rewrite this to a |
4030 | // load from a GEP. This reduces the size of the load. If a load is used |
4031 | // only by extractvalue instructions then this either must have been |
4032 | // optimized before, or it is a struct with padding, in which case we |
4033 | // don't want to do the transformation as it loses padding knowledge. |
4034 | if (L->isSimple() && L->hasOneUse()) { |
4035 | // extractvalue has integer indices, getelementptr has Value*s. Convert. |
4036 | SmallVector<Value*, 4> Indices; |
4037 | // Prefix an i32 0 since we need the first element. |
4038 | Indices.push_back(Elt: Builder.getInt32(C: 0)); |
4039 | for (unsigned Idx : EV.indices()) |
4040 | Indices.push_back(Elt: Builder.getInt32(C: Idx)); |
4041 | |
4042 | // We need to insert these at the location of the old load, not at that of |
4043 | // the extractvalue. |
4044 | Builder.SetInsertPoint(L); |
4045 | Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(), |
4046 | Ptr: L->getPointerOperand(), IdxList: Indices); |
4047 | Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP); |
4048 | // Whatever aliasing information we had for the orignal load must also |
4049 | // hold for the smaller load, so propagate the annotations. |
4050 | NL->setAAMetadata(L->getAAMetadata()); |
4051 | // Returning the load directly will cause the main loop to insert it in |
4052 | // the wrong spot, so use replaceInstUsesWith(). |
4053 | return replaceInstUsesWith(I&: EV, V: NL); |
4054 | } |
4055 | } |
4056 | |
4057 | if (auto *PN = dyn_cast<PHINode>(Val: Agg)) |
4058 | if (Instruction *Res = foldOpIntoPhi(I&: EV, PN)) |
4059 | return Res; |
4060 | |
4061 | // Canonicalize extract (select Cond, TV, FV) |
4062 | // -> select cond, (extract TV), (extract FV) |
4063 | if (auto *SI = dyn_cast<SelectInst>(Val: Agg)) |
4064 | if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true)) |
4065 | return R; |
4066 | |
4067 | // We could simplify extracts from other values. Note that nested extracts may |
4068 | // already be simplified implicitly by the above: extract (extract (insert) ) |
4069 | // will be translated into extract ( insert ( extract ) ) first and then just |
4070 | // the value inserted, if appropriate. Similarly for extracts from single-use |
4071 | // loads: extract (extract (load)) will be translated to extract (load (gep)) |
4072 | // and if again single-use then via load (gep (gep)) to load (gep). |
4073 | // However, double extracts from e.g. function arguments or return values |
4074 | // aren't handled yet. |
4075 | return nullptr; |
4076 | } |
4077 | |
4078 | /// Return 'true' if the given typeinfo will match anything. |
4079 | static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { |
4080 | switch (Personality) { |
4081 | case EHPersonality::GNU_C: |
4082 | case EHPersonality::GNU_C_SjLj: |
4083 | case EHPersonality::Rust: |
4084 | // The GCC C EH and Rust personality only exists to support cleanups, so |
4085 | // it's not clear what the semantics of catch clauses are. |
4086 | return false; |
4087 | case EHPersonality::Unknown: |
4088 | return false; |
4089 | case EHPersonality::GNU_Ada: |
4090 | // While __gnat_all_others_value will match any Ada exception, it doesn't |
4091 | // match foreign exceptions (or didn't, before gcc-4.7). |
4092 | return false; |
4093 | case EHPersonality::GNU_CXX: |
4094 | case EHPersonality::GNU_CXX_SjLj: |
4095 | case EHPersonality::GNU_ObjC: |
4096 | case EHPersonality::MSVC_X86SEH: |
4097 | case EHPersonality::MSVC_TableSEH: |
4098 | case EHPersonality::MSVC_CXX: |
4099 | case EHPersonality::CoreCLR: |
4100 | case EHPersonality::Wasm_CXX: |
4101 | case EHPersonality::XL_CXX: |
4102 | case EHPersonality::ZOS_CXX: |
4103 | return TypeInfo->isNullValue(); |
4104 | } |
4105 | llvm_unreachable("invalid enum" ); |
4106 | } |
4107 | |
4108 | static bool shorter_filter(const Value *LHS, const Value *RHS) { |
4109 | return |
4110 | cast<ArrayType>(Val: LHS->getType())->getNumElements() |
4111 | < |
4112 | cast<ArrayType>(Val: RHS->getType())->getNumElements(); |
4113 | } |
4114 | |
4115 | Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { |
4116 | // The logic here should be correct for any real-world personality function. |
4117 | // However if that turns out not to be true, the offending logic can always |
4118 | // be conditioned on the personality function, like the catch-all logic is. |
4119 | EHPersonality Personality = |
4120 | classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn()); |
4121 | |
4122 | // Simplify the list of clauses, eg by removing repeated catch clauses |
4123 | // (these are often created by inlining). |
4124 | bool MakeNewInstruction = false; // If true, recreate using the following: |
4125 | SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction; |
4126 | bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. |
4127 | |
4128 | SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. |
4129 | for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { |
4130 | bool isLastClause = i + 1 == e; |
4131 | if (LI.isCatch(Idx: i)) { |
4132 | // A catch clause. |
4133 | Constant *CatchClause = LI.getClause(Idx: i); |
4134 | Constant *TypeInfo = CatchClause->stripPointerCasts(); |
4135 | |
4136 | // If we already saw this clause, there is no point in having a second |
4137 | // copy of it. |
4138 | if (AlreadyCaught.insert(Ptr: TypeInfo).second) { |
4139 | // This catch clause was not already seen. |
4140 | NewClauses.push_back(Elt: CatchClause); |
4141 | } else { |
4142 | // Repeated catch clause - drop the redundant copy. |
4143 | MakeNewInstruction = true; |
4144 | } |
4145 | |
4146 | // If this is a catch-all then there is no point in keeping any following |
4147 | // clauses or marking the landingpad as having a cleanup. |
4148 | if (isCatchAll(Personality, TypeInfo)) { |
4149 | if (!isLastClause) |
4150 | MakeNewInstruction = true; |
4151 | CleanupFlag = false; |
4152 | break; |
4153 | } |
4154 | } else { |
4155 | // A filter clause. If any of the filter elements were already caught |
4156 | // then they can be dropped from the filter. It is tempting to try to |
4157 | // exploit the filter further by saying that any typeinfo that does not |
4158 | // occur in the filter can't be caught later (and thus can be dropped). |
4159 | // However this would be wrong, since typeinfos can match without being |
4160 | // equal (for example if one represents a C++ class, and the other some |
4161 | // class derived from it). |
4162 | assert(LI.isFilter(i) && "Unsupported landingpad clause!" ); |
4163 | Constant *FilterClause = LI.getClause(Idx: i); |
4164 | ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType()); |
4165 | unsigned NumTypeInfos = FilterType->getNumElements(); |
4166 | |
4167 | // An empty filter catches everything, so there is no point in keeping any |
4168 | // following clauses or marking the landingpad as having a cleanup. By |
4169 | // dealing with this case here the following code is made a bit simpler. |
4170 | if (!NumTypeInfos) { |
4171 | NewClauses.push_back(Elt: FilterClause); |
4172 | if (!isLastClause) |
4173 | MakeNewInstruction = true; |
4174 | CleanupFlag = false; |
4175 | break; |
4176 | } |
4177 | |
4178 | bool MakeNewFilter = false; // If true, make a new filter. |
4179 | SmallVector<Constant *, 16> NewFilterElts; // New elements. |
4180 | if (isa<ConstantAggregateZero>(Val: FilterClause)) { |
4181 | // Not an empty filter - it contains at least one null typeinfo. |
4182 | assert(NumTypeInfos > 0 && "Should have handled empty filter already!" ); |
4183 | Constant *TypeInfo = |
4184 | Constant::getNullValue(Ty: FilterType->getElementType()); |
4185 | // If this typeinfo is a catch-all then the filter can never match. |
4186 | if (isCatchAll(Personality, TypeInfo)) { |
4187 | // Throw the filter away. |
4188 | MakeNewInstruction = true; |
4189 | continue; |
4190 | } |
4191 | |
4192 | // There is no point in having multiple copies of this typeinfo, so |
4193 | // discard all but the first copy if there is more than one. |
4194 | NewFilterElts.push_back(Elt: TypeInfo); |
4195 | if (NumTypeInfos > 1) |
4196 | MakeNewFilter = true; |
4197 | } else { |
4198 | ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause); |
4199 | SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. |
4200 | NewFilterElts.reserve(N: NumTypeInfos); |
4201 | |
4202 | // Remove any filter elements that were already caught or that already |
4203 | // occurred in the filter. While there, see if any of the elements are |
4204 | // catch-alls. If so, the filter can be discarded. |
4205 | bool SawCatchAll = false; |
4206 | for (unsigned j = 0; j != NumTypeInfos; ++j) { |
4207 | Constant *Elt = Filter->getOperand(i_nocapture: j); |
4208 | Constant *TypeInfo = Elt->stripPointerCasts(); |
4209 | if (isCatchAll(Personality, TypeInfo)) { |
4210 | // This element is a catch-all. Bail out, noting this fact. |
4211 | SawCatchAll = true; |
4212 | break; |
4213 | } |
4214 | |
4215 | // Even if we've seen a type in a catch clause, we don't want to |
4216 | // remove it from the filter. An unexpected type handler may be |
4217 | // set up for a call site which throws an exception of the same |
4218 | // type caught. In order for the exception thrown by the unexpected |
4219 | // handler to propagate correctly, the filter must be correctly |
4220 | // described for the call site. |
4221 | // |
4222 | // Example: |
4223 | // |
4224 | // void unexpected() { throw 1;} |
4225 | // void foo() throw (int) { |
4226 | // std::set_unexpected(unexpected); |
4227 | // try { |
4228 | // throw 2.0; |
4229 | // } catch (int i) {} |
4230 | // } |
4231 | |
4232 | // There is no point in having multiple copies of the same typeinfo in |
4233 | // a filter, so only add it if we didn't already. |
4234 | if (SeenInFilter.insert(Ptr: TypeInfo).second) |
4235 | NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt)); |
4236 | } |
4237 | // A filter containing a catch-all cannot match anything by definition. |
4238 | if (SawCatchAll) { |
4239 | // Throw the filter away. |
4240 | MakeNewInstruction = true; |
4241 | continue; |
4242 | } |
4243 | |
4244 | // If we dropped something from the filter, make a new one. |
4245 | if (NewFilterElts.size() < NumTypeInfos) |
4246 | MakeNewFilter = true; |
4247 | } |
4248 | if (MakeNewFilter) { |
4249 | FilterType = ArrayType::get(ElementType: FilterType->getElementType(), |
4250 | NumElements: NewFilterElts.size()); |
4251 | FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts); |
4252 | MakeNewInstruction = true; |
4253 | } |
4254 | |
4255 | NewClauses.push_back(Elt: FilterClause); |
4256 | |
4257 | // If the new filter is empty then it will catch everything so there is |
4258 | // no point in keeping any following clauses or marking the landingpad |
4259 | // as having a cleanup. The case of the original filter being empty was |
4260 | // already handled above. |
4261 | if (MakeNewFilter && !NewFilterElts.size()) { |
4262 | assert(MakeNewInstruction && "New filter but not a new instruction!" ); |
4263 | CleanupFlag = false; |
4264 | break; |
4265 | } |
4266 | } |
4267 | } |
4268 | |
4269 | // If several filters occur in a row then reorder them so that the shortest |
4270 | // filters come first (those with the smallest number of elements). This is |
4271 | // advantageous because shorter filters are more likely to match, speeding up |
4272 | // unwinding, but mostly because it increases the effectiveness of the other |
4273 | // filter optimizations below. |
4274 | for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { |
4275 | unsigned j; |
4276 | // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. |
4277 | for (j = i; j != e; ++j) |
4278 | if (!isa<ArrayType>(Val: NewClauses[j]->getType())) |
4279 | break; |
4280 | |
4281 | // Check whether the filters are already sorted by length. We need to know |
4282 | // if sorting them is actually going to do anything so that we only make a |
4283 | // new landingpad instruction if it does. |
4284 | for (unsigned k = i; k + 1 < j; ++k) |
4285 | if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) { |
4286 | // Not sorted, so sort the filters now. Doing an unstable sort would be |
4287 | // correct too but reordering filters pointlessly might confuse users. |
4288 | std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j, |
4289 | comp: shorter_filter); |
4290 | MakeNewInstruction = true; |
4291 | break; |
4292 | } |
4293 | |
4294 | // Look for the next batch of filters. |
4295 | i = j + 1; |
4296 | } |
4297 | |
4298 | // If typeinfos matched if and only if equal, then the elements of a filter L |
4299 | // that occurs later than a filter F could be replaced by the intersection of |
4300 | // the elements of F and L. In reality two typeinfos can match without being |
4301 | // equal (for example if one represents a C++ class, and the other some class |
4302 | // derived from it) so it would be wrong to perform this transform in general. |
4303 | // However the transform is correct and useful if F is a subset of L. In that |
4304 | // case L can be replaced by F, and thus removed altogether since repeating a |
4305 | // filter is pointless. So here we look at all pairs of filters F and L where |
4306 | // L follows F in the list of clauses, and remove L if every element of F is |
4307 | // an element of L. This can occur when inlining C++ functions with exception |
4308 | // specifications. |
4309 | for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { |
4310 | // Examine each filter in turn. |
4311 | Value *Filter = NewClauses[i]; |
4312 | ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType()); |
4313 | if (!FTy) |
4314 | // Not a filter - skip it. |
4315 | continue; |
4316 | unsigned FElts = FTy->getNumElements(); |
4317 | // Examine each filter following this one. Doing this backwards means that |
4318 | // we don't have to worry about filters disappearing under us when removed. |
4319 | for (unsigned j = NewClauses.size() - 1; j != i; --j) { |
4320 | Value *LFilter = NewClauses[j]; |
4321 | ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType()); |
4322 | if (!LTy) |
4323 | // Not a filter - skip it. |
4324 | continue; |
4325 | // If Filter is a subset of LFilter, i.e. every element of Filter is also |
4326 | // an element of LFilter, then discard LFilter. |
4327 | SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j; |
4328 | // If Filter is empty then it is a subset of LFilter. |
4329 | if (!FElts) { |
4330 | // Discard LFilter. |
4331 | NewClauses.erase(CI: J); |
4332 | MakeNewInstruction = true; |
4333 | // Move on to the next filter. |
4334 | continue; |
4335 | } |
4336 | unsigned LElts = LTy->getNumElements(); |
4337 | // If Filter is longer than LFilter then it cannot be a subset of it. |
4338 | if (FElts > LElts) |
4339 | // Move on to the next filter. |
4340 | continue; |
4341 | // At this point we know that LFilter has at least one element. |
4342 | if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros. |
4343 | // Filter is a subset of LFilter iff Filter contains only zeros (as we |
4344 | // already know that Filter is not longer than LFilter). |
4345 | if (isa<ConstantAggregateZero>(Val: Filter)) { |
4346 | assert(FElts <= LElts && "Should have handled this case earlier!" ); |
4347 | // Discard LFilter. |
4348 | NewClauses.erase(CI: J); |
4349 | MakeNewInstruction = true; |
4350 | } |
4351 | // Move on to the next filter. |
4352 | continue; |
4353 | } |
4354 | ConstantArray *LArray = cast<ConstantArray>(Val: LFilter); |
4355 | if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros. |
4356 | // Since Filter is non-empty and contains only zeros, it is a subset of |
4357 | // LFilter iff LFilter contains a zero. |
4358 | assert(FElts > 0 && "Should have eliminated the empty filter earlier!" ); |
4359 | for (unsigned l = 0; l != LElts; ++l) |
4360 | if (LArray->getOperand(i_nocapture: l)->isNullValue()) { |
4361 | // LFilter contains a zero - discard it. |
4362 | NewClauses.erase(CI: J); |
4363 | MakeNewInstruction = true; |
4364 | break; |
4365 | } |
4366 | // Move on to the next filter. |
4367 | continue; |
4368 | } |
4369 | // At this point we know that both filters are ConstantArrays. Loop over |
4370 | // operands to see whether every element of Filter is also an element of |
4371 | // LFilter. Since filters tend to be short this is probably faster than |
4372 | // using a method that scales nicely. |
4373 | ConstantArray *FArray = cast<ConstantArray>(Val: Filter); |
4374 | bool AllFound = true; |
4375 | for (unsigned f = 0; f != FElts; ++f) { |
4376 | Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts(); |
4377 | AllFound = false; |
4378 | for (unsigned l = 0; l != LElts; ++l) { |
4379 | Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts(); |
4380 | if (LTypeInfo == FTypeInfo) { |
4381 | AllFound = true; |
4382 | break; |
4383 | } |
4384 | } |
4385 | if (!AllFound) |
4386 | break; |
4387 | } |
4388 | if (AllFound) { |
4389 | // Discard LFilter. |
4390 | NewClauses.erase(CI: J); |
4391 | MakeNewInstruction = true; |
4392 | } |
4393 | // Move on to the next filter. |
4394 | } |
4395 | } |
4396 | |
4397 | // If we changed any of the clauses, replace the old landingpad instruction |
4398 | // with a new one. |
4399 | if (MakeNewInstruction) { |
4400 | LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(), |
4401 | NumReservedClauses: NewClauses.size()); |
4402 | for (Constant *C : NewClauses) |
4403 | NLI->addClause(ClauseVal: C); |
4404 | // A landing pad with no clauses must have the cleanup flag set. It is |
4405 | // theoretically possible, though highly unlikely, that we eliminated all |
4406 | // clauses. If so, force the cleanup flag to true. |
4407 | if (NewClauses.empty()) |
4408 | CleanupFlag = true; |
4409 | NLI->setCleanup(CleanupFlag); |
4410 | return NLI; |
4411 | } |
4412 | |
4413 | // Even if none of the clauses changed, we may nonetheless have understood |
4414 | // that the cleanup flag is pointless. Clear it if so. |
4415 | if (LI.isCleanup() != CleanupFlag) { |
4416 | assert(!CleanupFlag && "Adding a cleanup, not removing one?!" ); |
4417 | LI.setCleanup(CleanupFlag); |
4418 | return &LI; |
4419 | } |
4420 | |
4421 | return nullptr; |
4422 | } |
4423 | |
4424 | Value * |
4425 | InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { |
4426 | // Try to push freeze through instructions that propagate but don't produce |
4427 | // poison as far as possible. If an operand of freeze follows three |
4428 | // conditions 1) one-use, 2) does not produce poison, and 3) has all but one |
4429 | // guaranteed-non-poison operands then push the freeze through to the one |
4430 | // operand that is not guaranteed non-poison. The actual transform is as |
4431 | // follows. |
4432 | // Op1 = ... ; Op1 can be posion |
4433 | // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have |
4434 | // ; single guaranteed-non-poison operands |
4435 | // ... = Freeze(Op0) |
4436 | // => |
4437 | // Op1 = ... |
4438 | // Op1.fr = Freeze(Op1) |
4439 | // ... = Inst(Op1.fr, NonPoisonOps...) |
4440 | auto *OrigOp = OrigFI.getOperand(i_nocapture: 0); |
4441 | auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp); |
4442 | |
4443 | // While we could change the other users of OrigOp to use freeze(OrigOp), that |
4444 | // potentially reduces their optimization potential, so let's only do this iff |
4445 | // the OrigOp is only used by the freeze. |
4446 | if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp)) |
4447 | return nullptr; |
4448 | |
4449 | // We can't push the freeze through an instruction which can itself create |
4450 | // poison. If the only source of new poison is flags, we can simply |
4451 | // strip them (since we know the only use is the freeze and nothing can |
4452 | // benefit from them.) |
4453 | if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp), |
4454 | /*ConsiderFlagsAndMetadata*/ false)) |
4455 | return nullptr; |
4456 | |
4457 | // If operand is guaranteed not to be poison, there is no need to add freeze |
4458 | // to the operand. So we first find the operand that is not guaranteed to be |
4459 | // poison. |
4460 | Use *MaybePoisonOperand = nullptr; |
4461 | for (Use &U : OrigOpInst->operands()) { |
4462 | if (isa<MetadataAsValue>(Val: U.get()) || |
4463 | isGuaranteedNotToBeUndefOrPoison(V: U.get())) |
4464 | continue; |
4465 | if (!MaybePoisonOperand) |
4466 | MaybePoisonOperand = &U; |
4467 | else |
4468 | return nullptr; |
4469 | } |
4470 | |
4471 | OrigOpInst->dropPoisonGeneratingAnnotations(); |
4472 | |
4473 | // If all operands are guaranteed to be non-poison, we can drop freeze. |
4474 | if (!MaybePoisonOperand) |
4475 | return OrigOp; |
4476 | |
4477 | Builder.SetInsertPoint(OrigOpInst); |
4478 | auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( |
4479 | V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr" ); |
4480 | |
4481 | replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand); |
4482 | return OrigOp; |
4483 | } |
4484 | |
4485 | Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, |
4486 | PHINode *PN) { |
4487 | // Detect whether this is a recurrence with a start value and some number of |
4488 | // backedge values. We'll check whether we can push the freeze through the |
4489 | // backedge values (possibly dropping poison flags along the way) until we |
4490 | // reach the phi again. In that case, we can move the freeze to the start |
4491 | // value. |
4492 | Use *StartU = nullptr; |
4493 | SmallVector<Value *> Worklist; |
4494 | for (Use &U : PN->incoming_values()) { |
4495 | if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) { |
4496 | // Add backedge value to worklist. |
4497 | Worklist.push_back(Elt: U.get()); |
4498 | continue; |
4499 | } |
4500 | |
4501 | // Don't bother handling multiple start values. |
4502 | if (StartU) |
4503 | return nullptr; |
4504 | StartU = &U; |
4505 | } |
4506 | |
4507 | if (!StartU || Worklist.empty()) |
4508 | return nullptr; // Not a recurrence. |
4509 | |
4510 | Value *StartV = StartU->get(); |
4511 | BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU); |
4512 | bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV); |
4513 | // We can't insert freeze if the start value is the result of the |
4514 | // terminator (e.g. an invoke). |
4515 | if (StartNeedsFreeze && StartBB->getTerminator() == StartV) |
4516 | return nullptr; |
4517 | |
4518 | SmallPtrSet<Value *, 32> Visited; |
4519 | SmallVector<Instruction *> DropFlags; |
4520 | while (!Worklist.empty()) { |
4521 | Value *V = Worklist.pop_back_val(); |
4522 | if (!Visited.insert(Ptr: V).second) |
4523 | continue; |
4524 | |
4525 | if (Visited.size() > 32) |
4526 | return nullptr; // Limit the total number of values we inspect. |
4527 | |
4528 | // Assume that PN is non-poison, because it will be after the transform. |
4529 | if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) |
4530 | continue; |
4531 | |
4532 | Instruction *I = dyn_cast<Instruction>(Val: V); |
4533 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
4534 | /*ConsiderFlagsAndMetadata*/ false)) |
4535 | return nullptr; |
4536 | |
4537 | DropFlags.push_back(Elt: I); |
4538 | append_range(C&: Worklist, R: I->operands()); |
4539 | } |
4540 | |
4541 | for (Instruction *I : DropFlags) |
4542 | I->dropPoisonGeneratingAnnotations(); |
4543 | |
4544 | if (StartNeedsFreeze) { |
4545 | Builder.SetInsertPoint(StartBB->getTerminator()); |
4546 | Value *FrozenStartV = Builder.CreateFreeze(V: StartV, |
4547 | Name: StartV->getName() + ".fr" ); |
4548 | replaceUse(U&: *StartU, NewValue: FrozenStartV); |
4549 | } |
4550 | return replaceInstUsesWith(I&: FI, V: PN); |
4551 | } |
4552 | |
4553 | bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { |
4554 | Value *Op = FI.getOperand(i_nocapture: 0); |
4555 | |
4556 | if (isa<Constant>(Val: Op) || Op->hasOneUse()) |
4557 | return false; |
4558 | |
4559 | // Move the freeze directly after the definition of its operand, so that |
4560 | // it dominates the maximum number of uses. Note that it may not dominate |
4561 | // *all* uses if the operand is an invoke/callbr and the use is in a phi on |
4562 | // the normal/default destination. This is why the domination check in the |
4563 | // replacement below is still necessary. |
4564 | BasicBlock::iterator MoveBefore; |
4565 | if (isa<Argument>(Val: Op)) { |
4566 | MoveBefore = |
4567 | FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); |
4568 | } else { |
4569 | auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef(); |
4570 | if (!MoveBeforeOpt) |
4571 | return false; |
4572 | MoveBefore = *MoveBeforeOpt; |
4573 | } |
4574 | |
4575 | // Don't move to the position of a debug intrinsic. |
4576 | if (isa<DbgInfoIntrinsic>(Val: MoveBefore)) |
4577 | MoveBefore = MoveBefore->getNextNonDebugInstruction()->getIterator(); |
4578 | // Re-point iterator to come after any debug-info records, if we're |
4579 | // running in "RemoveDIs" mode |
4580 | MoveBefore.setHeadBit(false); |
4581 | |
4582 | bool Changed = false; |
4583 | if (&FI != &*MoveBefore) { |
4584 | FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore); |
4585 | Changed = true; |
4586 | } |
4587 | |
4588 | Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool { |
4589 | bool Dominates = DT.dominates(Def: &FI, U); |
4590 | Changed |= Dominates; |
4591 | return Dominates; |
4592 | }); |
4593 | |
4594 | return Changed; |
4595 | } |
4596 | |
4597 | // Check if any direct or bitcast user of this value is a shuffle instruction. |
4598 | static bool isUsedWithinShuffleVector(Value *V) { |
4599 | for (auto *U : V->users()) { |
4600 | if (isa<ShuffleVectorInst>(Val: U)) |
4601 | return true; |
4602 | else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U)) |
4603 | return true; |
4604 | } |
4605 | return false; |
4606 | } |
4607 | |
4608 | Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { |
4609 | Value *Op0 = I.getOperand(i_nocapture: 0); |
4610 | |
4611 | if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I))) |
4612 | return replaceInstUsesWith(I, V); |
4613 | |
4614 | // freeze (phi const, x) --> phi const, (freeze x) |
4615 | if (auto *PN = dyn_cast<PHINode>(Val: Op0)) { |
4616 | if (Instruction *NV = foldOpIntoPhi(I, PN)) |
4617 | return NV; |
4618 | if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN)) |
4619 | return NV; |
4620 | } |
4621 | |
4622 | if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I)) |
4623 | return replaceInstUsesWith(I, V: NI); |
4624 | |
4625 | // If I is freeze(undef), check its uses and fold it to a fixed constant. |
4626 | // - or: pick -1 |
4627 | // - select's condition: if the true value is constant, choose it by making |
4628 | // the condition true. |
4629 | // - default: pick 0 |
4630 | // |
4631 | // Note that this transform is intentionally done here rather than |
4632 | // via an analysis in InstSimplify or at individual user sites. That is |
4633 | // because we must produce the same value for all uses of the freeze - |
4634 | // it's the reason "freeze" exists! |
4635 | // |
4636 | // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid |
4637 | // duplicating logic for binops at least. |
4638 | auto getUndefReplacement = [&I](Type *Ty) { |
4639 | Constant *BestValue = nullptr; |
4640 | Constant *NullValue = Constant::getNullValue(Ty); |
4641 | for (const auto *U : I.users()) { |
4642 | Constant *C = NullValue; |
4643 | if (match(V: U, P: m_Or(L: m_Value(), R: m_Value()))) |
4644 | C = ConstantInt::getAllOnesValue(Ty); |
4645 | else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value()))) |
4646 | C = ConstantInt::getTrue(Ty); |
4647 | |
4648 | if (!BestValue) |
4649 | BestValue = C; |
4650 | else if (BestValue != C) |
4651 | BestValue = NullValue; |
4652 | } |
4653 | assert(BestValue && "Must have at least one use" ); |
4654 | return BestValue; |
4655 | }; |
4656 | |
4657 | if (match(V: Op0, P: m_Undef())) { |
4658 | // Don't fold freeze(undef/poison) if it's used as a vector operand in |
4659 | // a shuffle. This may improve codegen for shuffles that allow |
4660 | // unspecified inputs. |
4661 | if (isUsedWithinShuffleVector(V: &I)) |
4662 | return nullptr; |
4663 | return replaceInstUsesWith(I, V: getUndefReplacement(I.getType())); |
4664 | } |
4665 | |
4666 | Constant *C; |
4667 | if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement()) { |
4668 | Constant *ReplaceC = getUndefReplacement(I.getType()->getScalarType()); |
4669 | return replaceInstUsesWith(I, V: Constant::replaceUndefsWith(C, Replacement: ReplaceC)); |
4670 | } |
4671 | |
4672 | // Replace uses of Op with freeze(Op). |
4673 | if (freezeOtherUses(FI&: I)) |
4674 | return &I; |
4675 | |
4676 | return nullptr; |
4677 | } |
4678 | |
4679 | /// Check for case where the call writes to an otherwise dead alloca. This |
4680 | /// shows up for unused out-params in idiomatic C/C++ code. Note that this |
4681 | /// helper *only* analyzes the write; doesn't check any other legality aspect. |
4682 | static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { |
4683 | auto *CB = dyn_cast<CallBase>(Val: I); |
4684 | if (!CB) |
4685 | // TODO: handle e.g. store to alloca here - only worth doing if we extend |
4686 | // to allow reload along used path as described below. Otherwise, this |
4687 | // is simply a store to a dead allocation which will be removed. |
4688 | return false; |
4689 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI); |
4690 | if (!Dest) |
4691 | return false; |
4692 | auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr)); |
4693 | if (!AI) |
4694 | // TODO: allow malloc? |
4695 | return false; |
4696 | // TODO: allow memory access dominated by move point? Note that since AI |
4697 | // could have a reference to itself captured by the call, we would need to |
4698 | // account for cycles in doing so. |
4699 | SmallVector<const User *> AllocaUsers; |
4700 | SmallPtrSet<const User *, 4> Visited; |
4701 | auto pushUsers = [&](const Instruction &I) { |
4702 | for (const User *U : I.users()) { |
4703 | if (Visited.insert(Ptr: U).second) |
4704 | AllocaUsers.push_back(Elt: U); |
4705 | } |
4706 | }; |
4707 | pushUsers(*AI); |
4708 | while (!AllocaUsers.empty()) { |
4709 | auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val()); |
4710 | if (isa<BitCastInst>(Val: UserI) || isa<GetElementPtrInst>(Val: UserI) || |
4711 | isa<AddrSpaceCastInst>(Val: UserI)) { |
4712 | pushUsers(*UserI); |
4713 | continue; |
4714 | } |
4715 | if (UserI == CB) |
4716 | continue; |
4717 | // TODO: support lifetime.start/end here |
4718 | return false; |
4719 | } |
4720 | return true; |
4721 | } |
4722 | |
4723 | /// Try to move the specified instruction from its current block into the |
4724 | /// beginning of DestBlock, which can only happen if it's safe to move the |
4725 | /// instruction past all of the instructions between it and the end of its |
4726 | /// block. |
4727 | bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, |
4728 | BasicBlock *DestBlock) { |
4729 | BasicBlock *SrcBlock = I->getParent(); |
4730 | |
4731 | // Cannot move control-flow-involving, volatile loads, vaarg, etc. |
4732 | if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || |
4733 | I->isTerminator()) |
4734 | return false; |
4735 | |
4736 | // Do not sink static or dynamic alloca instructions. Static allocas must |
4737 | // remain in the entry block, and dynamic allocas must not be sunk in between |
4738 | // a stacksave / stackrestore pair, which would incorrectly shorten its |
4739 | // lifetime. |
4740 | if (isa<AllocaInst>(Val: I)) |
4741 | return false; |
4742 | |
4743 | // Do not sink into catchswitch blocks. |
4744 | if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator())) |
4745 | return false; |
4746 | |
4747 | // Do not sink convergent call instructions. |
4748 | if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
4749 | if (CI->isConvergent()) |
4750 | return false; |
4751 | } |
4752 | |
4753 | // Unless we can prove that the memory write isn't visibile except on the |
4754 | // path we're sinking to, we must bail. |
4755 | if (I->mayWriteToMemory()) { |
4756 | if (!SoleWriteToDeadLocal(I, TLI)) |
4757 | return false; |
4758 | } |
4759 | |
4760 | // We can only sink load instructions if there is nothing between the load and |
4761 | // the end of block that could change the value. |
4762 | if (I->mayReadFromMemory()) { |
4763 | // We don't want to do any sophisticated alias analysis, so we only check |
4764 | // the instructions after I in I's parent block if we try to sink to its |
4765 | // successor block. |
4766 | if (DestBlock->getUniquePredecessor() != I->getParent()) |
4767 | return false; |
4768 | for (BasicBlock::iterator Scan = std::next(x: I->getIterator()), |
4769 | E = I->getParent()->end(); |
4770 | Scan != E; ++Scan) |
4771 | if (Scan->mayWriteToMemory()) |
4772 | return false; |
4773 | } |
4774 | |
4775 | I->dropDroppableUses(ShouldDrop: [&](const Use *U) { |
4776 | auto *I = dyn_cast<Instruction>(Val: U->getUser()); |
4777 | if (I && I->getParent() != DestBlock) { |
4778 | Worklist.add(I); |
4779 | return true; |
4780 | } |
4781 | return false; |
4782 | }); |
4783 | /// FIXME: We could remove droppable uses that are not dominated by |
4784 | /// the new position. |
4785 | |
4786 | BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); |
4787 | I->moveBefore(BB&: *DestBlock, I: InsertPos); |
4788 | ++NumSunkInst; |
4789 | |
4790 | // Also sink all related debug uses from the source basic block. Otherwise we |
4791 | // get debug use before the def. Attempt to salvage debug uses first, to |
4792 | // maximise the range variables have location for. If we cannot salvage, then |
4793 | // mark the location undef: we know it was supposed to receive a new location |
4794 | // here, but that computation has been sunk. |
4795 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsers; |
4796 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecords; |
4797 | findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords); |
4798 | if (!DbgUsers.empty()) |
4799 | tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers); |
4800 | if (!DbgVariableRecords.empty()) |
4801 | tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock, |
4802 | DPUsers&: DbgVariableRecords); |
4803 | |
4804 | // PS: there are numerous flaws with this behaviour, not least that right now |
4805 | // assignments can be re-ordered past other assignments to the same variable |
4806 | // if they use different Values. Creating more undef assignements can never be |
4807 | // undone. And salvaging all users outside of this block can un-necessarily |
4808 | // alter the lifetime of the live-value that the variable refers to. |
4809 | // Some of these things can be resolved by tolerating debug use-before-defs in |
4810 | // LLVM-IR, however it depends on the instruction-referencing CodeGen backend |
4811 | // being used for more architectures. |
4812 | |
4813 | return true; |
4814 | } |
4815 | |
4816 | void InstCombinerImpl::tryToSinkInstructionDbgValues( |
4817 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
4818 | BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) { |
4819 | // For all debug values in the destination block, the sunk instruction |
4820 | // will still be available, so they do not need to be dropped. |
4821 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage; |
4822 | for (auto &DbgUser : DbgUsers) |
4823 | if (DbgUser->getParent() != DestBlock) |
4824 | DbgUsersToSalvage.push_back(Elt: DbgUser); |
4825 | |
4826 | // Process the sinking DbgUsersToSalvage in reverse order, as we only want |
4827 | // to clone the last appearing debug intrinsic for each given variable. |
4828 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink; |
4829 | for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage) |
4830 | if (DVI->getParent() == SrcBlock) |
4831 | DbgUsersToSink.push_back(Elt: DVI); |
4832 | llvm::sort(C&: DbgUsersToSink, |
4833 | Comp: [](auto *A, auto *B) { return B->comesBefore(A); }); |
4834 | |
4835 | SmallVector<DbgVariableIntrinsic *, 2> DIIClones; |
4836 | SmallSet<DebugVariable, 4> SunkVariables; |
4837 | for (auto *User : DbgUsersToSink) { |
4838 | // A dbg.declare instruction should not be cloned, since there can only be |
4839 | // one per variable fragment. It should be left in the original place |
4840 | // because the sunk instruction is not an alloca (otherwise we could not be |
4841 | // here). |
4842 | if (isa<DbgDeclareInst>(Val: User)) |
4843 | continue; |
4844 | |
4845 | DebugVariable DbgUserVariable = |
4846 | DebugVariable(User->getVariable(), User->getExpression(), |
4847 | User->getDebugLoc()->getInlinedAt()); |
4848 | |
4849 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
4850 | continue; |
4851 | |
4852 | // Leave dbg.assign intrinsics in their original positions and there should |
4853 | // be no need to insert a clone. |
4854 | if (isa<DbgAssignIntrinsic>(Val: User)) |
4855 | continue; |
4856 | |
4857 | DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone())); |
4858 | if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I)) |
4859 | DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: 0)); |
4860 | LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n'); |
4861 | } |
4862 | |
4863 | // Perform salvaging without the clones, then sink the clones. |
4864 | if (!DIIClones.empty()) { |
4865 | salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {}); |
4866 | // The clones are in reverse order of original appearance, reverse again to |
4867 | // maintain the original order. |
4868 | for (auto &DIIClone : llvm::reverse(C&: DIIClones)) { |
4869 | DIIClone->insertBefore(InsertPos: &*InsertPos); |
4870 | LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n'); |
4871 | } |
4872 | } |
4873 | } |
4874 | |
4875 | void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords( |
4876 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
4877 | BasicBlock *DestBlock, |
4878 | SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) { |
4879 | // Implementation of tryToSinkInstructionDbgValues, but for the |
4880 | // DbgVariableRecord of variable assignments rather than dbg.values. |
4881 | |
4882 | // Fetch all DbgVariableRecords not already in the destination. |
4883 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage; |
4884 | for (auto &DVR : DbgVariableRecords) |
4885 | if (DVR->getParent() != DestBlock) |
4886 | DbgVariableRecordsToSalvage.push_back(Elt: DVR); |
4887 | |
4888 | // Fetch a second collection, of DbgVariableRecords in the source block that |
4889 | // we're going to sink. |
4890 | SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink; |
4891 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage) |
4892 | if (DVR->getParent() == SrcBlock) |
4893 | DbgVariableRecordsToSink.push_back(Elt: DVR); |
4894 | |
4895 | // Sort DbgVariableRecords according to their position in the block. This is a |
4896 | // partial order: DbgVariableRecords attached to different instructions will |
4897 | // be ordered by the instruction order, but DbgVariableRecords attached to the |
4898 | // same instruction won't have an order. |
4899 | auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool { |
4900 | return B->getInstruction()->comesBefore(Other: A->getInstruction()); |
4901 | }; |
4902 | llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order); |
4903 | |
4904 | // If there are two assignments to the same variable attached to the same |
4905 | // instruction, the ordering between the two assignments is important. Scan |
4906 | // for this (rare) case and establish which is the last assignment. |
4907 | using InstVarPair = std::pair<const Instruction *, DebugVariable>; |
4908 | SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap; |
4909 | if (DbgVariableRecordsToSink.size() > 1) { |
4910 | SmallDenseMap<InstVarPair, unsigned> CountMap; |
4911 | // Count how many assignments to each variable there is per instruction. |
4912 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
4913 | DebugVariable DbgUserVariable = |
4914 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
4915 | DVR->getDebugLoc()->getInlinedAt()); |
4916 | CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1; |
4917 | } |
4918 | |
4919 | // If there are any instructions with two assignments, add them to the |
4920 | // FilterOutMap to record that they need extra filtering. |
4921 | SmallPtrSet<const Instruction *, 4> DupSet; |
4922 | for (auto It : CountMap) { |
4923 | if (It.second > 1) { |
4924 | FilterOutMap[It.first] = nullptr; |
4925 | DupSet.insert(Ptr: It.first.first); |
4926 | } |
4927 | } |
4928 | |
4929 | // For all instruction/variable pairs needing extra filtering, find the |
4930 | // latest assignment. |
4931 | for (const Instruction *Inst : DupSet) { |
4932 | for (DbgVariableRecord &DVR : |
4933 | llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) { |
4934 | DebugVariable DbgUserVariable = |
4935 | DebugVariable(DVR.getVariable(), DVR.getExpression(), |
4936 | DVR.getDebugLoc()->getInlinedAt()); |
4937 | auto FilterIt = |
4938 | FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable)); |
4939 | if (FilterIt == FilterOutMap.end()) |
4940 | continue; |
4941 | if (FilterIt->second != nullptr) |
4942 | continue; |
4943 | FilterIt->second = &DVR; |
4944 | } |
4945 | } |
4946 | } |
4947 | |
4948 | // Perform cloning of the DbgVariableRecords that we plan on sinking, filter |
4949 | // out any duplicate assignments identified above. |
4950 | SmallVector<DbgVariableRecord *, 2> DVRClones; |
4951 | SmallSet<DebugVariable, 4> SunkVariables; |
4952 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
4953 | if (DVR->Type == DbgVariableRecord::LocationType::Declare) |
4954 | continue; |
4955 | |
4956 | DebugVariable DbgUserVariable = |
4957 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
4958 | DVR->getDebugLoc()->getInlinedAt()); |
4959 | |
4960 | // For any variable where there were multiple assignments in the same place, |
4961 | // ignore all but the last assignment. |
4962 | if (!FilterOutMap.empty()) { |
4963 | InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable); |
4964 | auto It = FilterOutMap.find(Val: IVP); |
4965 | |
4966 | // Filter out. |
4967 | if (It != FilterOutMap.end() && It->second != DVR) |
4968 | continue; |
4969 | } |
4970 | |
4971 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
4972 | continue; |
4973 | |
4974 | if (DVR->isDbgAssign()) |
4975 | continue; |
4976 | |
4977 | DVRClones.emplace_back(Args: DVR->clone()); |
4978 | LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n'); |
4979 | } |
4980 | |
4981 | // Perform salvaging without the clones, then sink the clones. |
4982 | if (DVRClones.empty()) |
4983 | return; |
4984 | |
4985 | salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage); |
4986 | |
4987 | // The clones are in reverse order of original appearance. Assert that the |
4988 | // head bit is set on the iterator as we _should_ have received it via |
4989 | // getFirstInsertionPt. Inserting like this will reverse the clone order as |
4990 | // we'll repeatedly insert at the head, such as: |
4991 | // DVR-3 (third insertion goes here) |
4992 | // DVR-2 (second insertion goes here) |
4993 | // DVR-1 (first insertion goes here) |
4994 | // Any-Prior-DVRs |
4995 | // InsertPtInst |
4996 | assert(InsertPos.getHeadBit()); |
4997 | for (DbgVariableRecord *DVRClone : DVRClones) { |
4998 | InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos); |
4999 | LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n'); |
5000 | } |
5001 | } |
5002 | |
5003 | bool InstCombinerImpl::run() { |
5004 | while (!Worklist.isEmpty()) { |
5005 | // Walk deferred instructions in reverse order, and push them to the |
5006 | // worklist, which means they'll end up popped from the worklist in-order. |
5007 | while (Instruction *I = Worklist.popDeferred()) { |
5008 | // Check to see if we can DCE the instruction. We do this already here to |
5009 | // reduce the number of uses and thus allow other folds to trigger. |
5010 | // Note that eraseInstFromFunction() may push additional instructions on |
5011 | // the deferred worklist, so this will DCE whole instruction chains. |
5012 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5013 | eraseInstFromFunction(I&: *I); |
5014 | ++NumDeadInst; |
5015 | continue; |
5016 | } |
5017 | |
5018 | Worklist.push(I); |
5019 | } |
5020 | |
5021 | Instruction *I = Worklist.removeOne(); |
5022 | if (I == nullptr) continue; // skip null values. |
5023 | |
5024 | // Check to see if we can DCE the instruction. |
5025 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5026 | eraseInstFromFunction(I&: *I); |
5027 | ++NumDeadInst; |
5028 | continue; |
5029 | } |
5030 | |
5031 | if (!DebugCounter::shouldExecute(CounterName: VisitCounter)) |
5032 | continue; |
5033 | |
5034 | // See if we can trivially sink this instruction to its user if we can |
5035 | // prove that the successor is not executed more frequently than our block. |
5036 | // Return the UserBlock if successful. |
5037 | auto getOptionalSinkBlockForInst = |
5038 | [this](Instruction *I) -> std::optional<BasicBlock *> { |
5039 | if (!EnableCodeSinking) |
5040 | return std::nullopt; |
5041 | |
5042 | BasicBlock *BB = I->getParent(); |
5043 | BasicBlock *UserParent = nullptr; |
5044 | unsigned NumUsers = 0; |
5045 | |
5046 | for (Use &U : I->uses()) { |
5047 | User *User = U.getUser(); |
5048 | if (User->isDroppable()) |
5049 | continue; |
5050 | if (NumUsers > MaxSinkNumUsers) |
5051 | return std::nullopt; |
5052 | |
5053 | Instruction *UserInst = cast<Instruction>(Val: User); |
5054 | // Special handling for Phi nodes - get the block the use occurs in. |
5055 | BasicBlock *UserBB = UserInst->getParent(); |
5056 | if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) |
5057 | UserBB = PN->getIncomingBlock(U); |
5058 | // Bail out if we have uses in different blocks. We don't do any |
5059 | // sophisticated analysis (i.e finding NearestCommonDominator of these |
5060 | // use blocks). |
5061 | if (UserParent && UserParent != UserBB) |
5062 | return std::nullopt; |
5063 | UserParent = UserBB; |
5064 | |
5065 | // Make sure these checks are done only once, naturally we do the checks |
5066 | // the first time we get the userparent, this will save compile time. |
5067 | if (NumUsers == 0) { |
5068 | // Try sinking to another block. If that block is unreachable, then do |
5069 | // not bother. SimplifyCFG should handle it. |
5070 | if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent)) |
5071 | return std::nullopt; |
5072 | |
5073 | auto *Term = UserParent->getTerminator(); |
5074 | // See if the user is one of our successors that has only one |
5075 | // predecessor, so that we don't have to split the critical edge. |
5076 | // Another option where we can sink is a block that ends with a |
5077 | // terminator that does not pass control to other block (such as |
5078 | // return or unreachable or resume). In this case: |
5079 | // - I dominates the User (by SSA form); |
5080 | // - the User will be executed at most once. |
5081 | // So sinking I down to User is always profitable or neutral. |
5082 | if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term)) |
5083 | return std::nullopt; |
5084 | |
5085 | assert(DT.dominates(BB, UserParent) && "Dominance relation broken?" ); |
5086 | } |
5087 | |
5088 | NumUsers++; |
5089 | } |
5090 | |
5091 | // No user or only has droppable users. |
5092 | if (!UserParent) |
5093 | return std::nullopt; |
5094 | |
5095 | return UserParent; |
5096 | }; |
5097 | |
5098 | auto OptBB = getOptionalSinkBlockForInst(I); |
5099 | if (OptBB) { |
5100 | auto *UserParent = *OptBB; |
5101 | // Okay, the CFG is simple enough, try to sink this instruction. |
5102 | if (tryToSinkInstruction(I, DestBlock: UserParent)) { |
5103 | LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); |
5104 | MadeIRChange = true; |
5105 | // We'll add uses of the sunk instruction below, but since |
5106 | // sinking can expose opportunities for it's *operands* add |
5107 | // them to the worklist |
5108 | for (Use &U : I->operands()) |
5109 | if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get())) |
5110 | Worklist.push(I: OpI); |
5111 | } |
5112 | } |
5113 | |
5114 | // Now that we have an instruction, try combining it to simplify it. |
5115 | Builder.SetInsertPoint(I); |
5116 | Builder.CollectMetadataToCopy( |
5117 | Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); |
5118 | |
5119 | #ifndef NDEBUG |
5120 | std::string OrigI; |
5121 | #endif |
5122 | LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS);); |
5123 | LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); |
5124 | |
5125 | if (Instruction *Result = visit(I&: *I)) { |
5126 | ++NumCombined; |
5127 | // Should we replace the old instruction with a new one? |
5128 | if (Result != I) { |
5129 | LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' |
5130 | << " New = " << *Result << '\n'); |
5131 | |
5132 | Result->copyMetadata(SrcInst: *I, |
5133 | WL: {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); |
5134 | // Everything uses the new instruction now. |
5135 | I->replaceAllUsesWith(V: Result); |
5136 | |
5137 | // Move the name to the new instruction first. |
5138 | Result->takeName(V: I); |
5139 | |
5140 | // Insert the new instruction into the basic block... |
5141 | BasicBlock *InstParent = I->getParent(); |
5142 | BasicBlock::iterator InsertPos = I->getIterator(); |
5143 | |
5144 | // Are we replace a PHI with something that isn't a PHI, or vice versa? |
5145 | if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) { |
5146 | // We need to fix up the insertion point. |
5147 | if (isa<PHINode>(Val: I)) // PHI -> Non-PHI |
5148 | InsertPos = InstParent->getFirstInsertionPt(); |
5149 | else // Non-PHI -> PHI |
5150 | InsertPos = InstParent->getFirstNonPHIIt(); |
5151 | } |
5152 | |
5153 | Result->insertInto(ParentBB: InstParent, It: InsertPos); |
5154 | |
5155 | // Push the new instruction and any users onto the worklist. |
5156 | Worklist.pushUsersToWorkList(I&: *Result); |
5157 | Worklist.push(I: Result); |
5158 | |
5159 | eraseInstFromFunction(I&: *I); |
5160 | } else { |
5161 | LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' |
5162 | << " New = " << *I << '\n'); |
5163 | |
5164 | // If the instruction was modified, it's possible that it is now dead. |
5165 | // if so, remove it. |
5166 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5167 | eraseInstFromFunction(I&: *I); |
5168 | } else { |
5169 | Worklist.pushUsersToWorkList(I&: *I); |
5170 | Worklist.push(I); |
5171 | } |
5172 | } |
5173 | MadeIRChange = true; |
5174 | } |
5175 | } |
5176 | |
5177 | Worklist.zap(); |
5178 | return MadeIRChange; |
5179 | } |
5180 | |
5181 | // Track the scopes used by !alias.scope and !noalias. In a function, a |
5182 | // @llvm.experimental.noalias.scope.decl is only useful if that scope is used |
5183 | // by both sets. If not, the declaration of the scope can be safely omitted. |
5184 | // The MDNode of the scope can be omitted as well for the instructions that are |
5185 | // part of this function. We do not do that at this point, as this might become |
5186 | // too time consuming to do. |
5187 | class AliasScopeTracker { |
5188 | SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists; |
5189 | SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists; |
5190 | |
5191 | public: |
5192 | void analyse(Instruction *I) { |
5193 | // This seems to be faster than checking 'mayReadOrWriteMemory()'. |
5194 | if (!I->hasMetadataOtherThanDebugLoc()) |
5195 | return; |
5196 | |
5197 | auto Track = [](Metadata *ScopeList, auto &Container) { |
5198 | const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList); |
5199 | if (!MDScopeList || !Container.insert(MDScopeList).second) |
5200 | return; |
5201 | for (const auto &MDOperand : MDScopeList->operands()) |
5202 | if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand)) |
5203 | Container.insert(MDScope); |
5204 | }; |
5205 | |
5206 | Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); |
5207 | Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); |
5208 | } |
5209 | |
5210 | bool isNoAliasScopeDeclDead(Instruction *Inst) { |
5211 | NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst); |
5212 | if (!Decl) |
5213 | return false; |
5214 | |
5215 | assert(Decl->use_empty() && |
5216 | "llvm.experimental.noalias.scope.decl in use ?" ); |
5217 | const MDNode *MDSL = Decl->getScopeList(); |
5218 | assert(MDSL->getNumOperands() == 1 && |
5219 | "llvm.experimental.noalias.scope should refer to a single scope" ); |
5220 | auto &MDOperand = MDSL->getOperand(I: 0); |
5221 | if (auto *MD = dyn_cast<MDNode>(Val: MDOperand)) |
5222 | return !UsedAliasScopesAndLists.contains(Ptr: MD) || |
5223 | !UsedNoAliasScopesAndLists.contains(Ptr: MD); |
5224 | |
5225 | // Not an MDNode ? throw away. |
5226 | return true; |
5227 | } |
5228 | }; |
5229 | |
5230 | /// Populate the IC worklist from a function, by walking it in reverse |
5231 | /// post-order and adding all reachable code to the worklist. |
5232 | /// |
5233 | /// This has a couple of tricks to make the code faster and more powerful. In |
5234 | /// particular, we constant fold and DCE instructions as we go, to avoid adding |
5235 | /// them to the worklist (this significantly speeds up instcombine on code where |
5236 | /// many instructions are dead or constant). Additionally, if we find a branch |
5237 | /// whose condition is a known constant, we only visit the reachable successors. |
5238 | bool InstCombinerImpl::prepareWorklist( |
5239 | Function &F, ReversePostOrderTraversal<BasicBlock *> &RPOT) { |
5240 | bool MadeIRChange = false; |
5241 | SmallPtrSet<BasicBlock *, 32> LiveBlocks; |
5242 | SmallVector<Instruction *, 128> InstrsForInstructionWorklist; |
5243 | DenseMap<Constant *, Constant *> FoldedConstants; |
5244 | AliasScopeTracker SeenAliasScopes; |
5245 | |
5246 | auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) { |
5247 | for (BasicBlock *Succ : successors(BB)) |
5248 | if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second) |
5249 | for (PHINode &PN : Succ->phis()) |
5250 | for (Use &U : PN.incoming_values()) |
5251 | if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) { |
5252 | U.set(PoisonValue::get(T: PN.getType())); |
5253 | MadeIRChange = true; |
5254 | } |
5255 | }; |
5256 | |
5257 | for (BasicBlock *BB : RPOT) { |
5258 | if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
5259 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
5260 | })) { |
5261 | HandleOnlyLiveSuccessor(BB, nullptr); |
5262 | continue; |
5263 | } |
5264 | LiveBlocks.insert(Ptr: BB); |
5265 | |
5266 | for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) { |
5267 | // ConstantProp instruction if trivially constant. |
5268 | if (!Inst.use_empty() && |
5269 | (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0)))) |
5270 | if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) { |
5271 | LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst |
5272 | << '\n'); |
5273 | Inst.replaceAllUsesWith(V: C); |
5274 | ++NumConstProp; |
5275 | if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI)) |
5276 | Inst.eraseFromParent(); |
5277 | MadeIRChange = true; |
5278 | continue; |
5279 | } |
5280 | |
5281 | // See if we can constant fold its operands. |
5282 | for (Use &U : Inst.operands()) { |
5283 | if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U)) |
5284 | continue; |
5285 | |
5286 | auto *C = cast<Constant>(Val&: U); |
5287 | Constant *&FoldRes = FoldedConstants[C]; |
5288 | if (!FoldRes) |
5289 | FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI); |
5290 | |
5291 | if (FoldRes != C) { |
5292 | LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst |
5293 | << "\n Old = " << *C |
5294 | << "\n New = " << *FoldRes << '\n'); |
5295 | U = FoldRes; |
5296 | MadeIRChange = true; |
5297 | } |
5298 | } |
5299 | |
5300 | // Skip processing debug and pseudo intrinsics in InstCombine. Processing |
5301 | // these call instructions consumes non-trivial amount of time and |
5302 | // provides no value for the optimization. |
5303 | if (!Inst.isDebugOrPseudoInst()) { |
5304 | InstrsForInstructionWorklist.push_back(Elt: &Inst); |
5305 | SeenAliasScopes.analyse(I: &Inst); |
5306 | } |
5307 | } |
5308 | |
5309 | // If this is a branch or switch on a constant, mark only the single |
5310 | // live successor. Otherwise assume all successors are live. |
5311 | Instruction *TI = BB->getTerminator(); |
5312 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) { |
5313 | if (isa<UndefValue>(Val: BI->getCondition())) { |
5314 | // Branch on undef is UB. |
5315 | HandleOnlyLiveSuccessor(BB, nullptr); |
5316 | continue; |
5317 | } |
5318 | if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
5319 | bool CondVal = Cond->getZExtValue(); |
5320 | HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal)); |
5321 | continue; |
5322 | } |
5323 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) { |
5324 | if (isa<UndefValue>(Val: SI->getCondition())) { |
5325 | // Switch on undef is UB. |
5326 | HandleOnlyLiveSuccessor(BB, nullptr); |
5327 | continue; |
5328 | } |
5329 | if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) { |
5330 | HandleOnlyLiveSuccessor(BB, |
5331 | SI->findCaseValue(C: Cond)->getCaseSuccessor()); |
5332 | continue; |
5333 | } |
5334 | } |
5335 | } |
5336 | |
5337 | // Remove instructions inside unreachable blocks. This prevents the |
5338 | // instcombine code from having to deal with some bad special cases, and |
5339 | // reduces use counts of instructions. |
5340 | for (BasicBlock &BB : F) { |
5341 | if (LiveBlocks.count(Ptr: &BB)) |
5342 | continue; |
5343 | |
5344 | unsigned NumDeadInstInBB; |
5345 | unsigned NumDeadDbgInstInBB; |
5346 | std::tie(args&: NumDeadInstInBB, args&: NumDeadDbgInstInBB) = |
5347 | removeAllNonTerminatorAndEHPadInstructions(BB: &BB); |
5348 | |
5349 | MadeIRChange |= NumDeadInstInBB + NumDeadDbgInstInBB > 0; |
5350 | NumDeadInst += NumDeadInstInBB; |
5351 | } |
5352 | |
5353 | // Once we've found all of the instructions to add to instcombine's worklist, |
5354 | // add them in reverse order. This way instcombine will visit from the top |
5355 | // of the function down. This jives well with the way that it adds all uses |
5356 | // of instructions to the worklist after doing a transformation, thus avoiding |
5357 | // some N^2 behavior in pathological cases. |
5358 | Worklist.reserve(Size: InstrsForInstructionWorklist.size()); |
5359 | for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) { |
5360 | // DCE instruction if trivially dead. As we iterate in reverse program |
5361 | // order here, we will clean up whole chains of dead instructions. |
5362 | if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) || |
5363 | SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { |
5364 | ++NumDeadInst; |
5365 | LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); |
5366 | salvageDebugInfo(I&: *Inst); |
5367 | Inst->eraseFromParent(); |
5368 | MadeIRChange = true; |
5369 | continue; |
5370 | } |
5371 | |
5372 | Worklist.push(I: Inst); |
5373 | } |
5374 | |
5375 | return MadeIRChange; |
5376 | } |
5377 | |
5378 | static bool combineInstructionsOverFunction( |
5379 | Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, |
5380 | AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, |
5381 | DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, |
5382 | BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, LoopInfo *LI, |
5383 | const InstCombineOptions &Opts) { |
5384 | auto &DL = F.getDataLayout(); |
5385 | |
5386 | /// Builder - This is an IRBuilder that automatically inserts new |
5387 | /// instructions into the worklist when they are created. |
5388 | IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder( |
5389 | F.getContext(), TargetFolder(DL), |
5390 | IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { |
5391 | Worklist.add(I); |
5392 | if (auto *Assume = dyn_cast<AssumeInst>(Val: I)) |
5393 | AC.registerAssumption(CI: Assume); |
5394 | })); |
5395 | |
5396 | ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front()); |
5397 | |
5398 | // Lower dbg.declare intrinsics otherwise their value may be clobbered |
5399 | // by instcombiner. |
5400 | bool MadeIRChange = false; |
5401 | if (ShouldLowerDbgDeclare) |
5402 | MadeIRChange = LowerDbgDeclare(F); |
5403 | |
5404 | // Iterate while there is work to do. |
5405 | unsigned Iteration = 0; |
5406 | while (true) { |
5407 | ++Iteration; |
5408 | |
5409 | if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) { |
5410 | LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations |
5411 | << " on " << F.getName() |
5412 | << " reached; stopping without verifying fixpoint\n" ); |
5413 | break; |
5414 | } |
5415 | |
5416 | ++NumWorklistIterations; |
5417 | LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " |
5418 | << F.getName() << "\n" ); |
5419 | |
5420 | InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, |
5421 | ORE, BFI, BPI, PSI, DL, LI); |
5422 | IC.MaxArraySizeForCombine = MaxArraySize; |
5423 | bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT); |
5424 | MadeChangeInThisIteration |= IC.run(); |
5425 | if (!MadeChangeInThisIteration) |
5426 | break; |
5427 | |
5428 | MadeIRChange = true; |
5429 | if (Iteration > Opts.MaxIterations) { |
5430 | report_fatal_error( |
5431 | reason: "Instruction Combining did not reach a fixpoint after " + |
5432 | Twine(Opts.MaxIterations) + " iterations" , |
5433 | /*GenCrashDiag=*/gen_crash_diag: false); |
5434 | } |
5435 | } |
5436 | |
5437 | if (Iteration == 1) |
5438 | ++NumOneIteration; |
5439 | else if (Iteration == 2) |
5440 | ++NumTwoIterations; |
5441 | else if (Iteration == 3) |
5442 | ++NumThreeIterations; |
5443 | else |
5444 | ++NumFourOrMoreIterations; |
5445 | |
5446 | return MadeIRChange; |
5447 | } |
5448 | |
5449 | InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {} |
5450 | |
5451 | void InstCombinePass::printPipeline( |
5452 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
5453 | static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline( |
5454 | OS, MapClassName2PassName); |
5455 | OS << '<'; |
5456 | OS << "max-iterations=" << Options.MaxIterations << ";" ; |
5457 | OS << (Options.UseLoopInfo ? "" : "no-" ) << "use-loop-info;" ; |
5458 | OS << (Options.VerifyFixpoint ? "" : "no-" ) << "verify-fixpoint" ; |
5459 | OS << '>'; |
5460 | } |
5461 | |
5462 | PreservedAnalyses InstCombinePass::run(Function &F, |
5463 | FunctionAnalysisManager &AM) { |
5464 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
5465 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
5466 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
5467 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
5468 | auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
5469 | |
5470 | // TODO: Only use LoopInfo when the option is set. This requires that the |
5471 | // callers in the pass pipeline explicitly set the option. |
5472 | auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F); |
5473 | if (!LI && Options.UseLoopInfo) |
5474 | LI = &AM.getResult<LoopAnalysis>(IR&: F); |
5475 | |
5476 | auto *AA = &AM.getResult<AAManager>(IR&: F); |
5477 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F); |
5478 | ProfileSummaryInfo *PSI = |
5479 | MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent()); |
5480 | auto *BFI = (PSI && PSI->hasProfileSummary()) ? |
5481 | &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr; |
5482 | auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F); |
5483 | |
5484 | if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
5485 | BFI, BPI, PSI, LI, Opts: Options)) |
5486 | // No changes, all analyses are preserved. |
5487 | return PreservedAnalyses::all(); |
5488 | |
5489 | // Mark all the analyses that instcombine updates as preserved. |
5490 | PreservedAnalyses PA; |
5491 | PA.preserveSet<CFGAnalyses>(); |
5492 | return PA; |
5493 | } |
5494 | |
5495 | void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { |
5496 | AU.setPreservesCFG(); |
5497 | AU.addRequired<AAResultsWrapperPass>(); |
5498 | AU.addRequired<AssumptionCacheTracker>(); |
5499 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
5500 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
5501 | AU.addRequired<DominatorTreeWrapperPass>(); |
5502 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
5503 | AU.addPreserved<DominatorTreeWrapperPass>(); |
5504 | AU.addPreserved<AAResultsWrapperPass>(); |
5505 | AU.addPreserved<BasicAAWrapperPass>(); |
5506 | AU.addPreserved<GlobalsAAWrapperPass>(); |
5507 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
5508 | LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); |
5509 | } |
5510 | |
5511 | bool InstructionCombiningPass::runOnFunction(Function &F) { |
5512 | if (skipFunction(F)) |
5513 | return false; |
5514 | |
5515 | // Required analyses. |
5516 | auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
5517 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
5518 | auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
5519 | auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
5520 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
5521 | auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
5522 | |
5523 | // Optional analyses. |
5524 | auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); |
5525 | auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; |
5526 | ProfileSummaryInfo *PSI = |
5527 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
5528 | BlockFrequencyInfo *BFI = |
5529 | (PSI && PSI->hasProfileSummary()) ? |
5530 | &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : |
5531 | nullptr; |
5532 | BranchProbabilityInfo *BPI = nullptr; |
5533 | if (auto *WrapperPass = |
5534 | getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>()) |
5535 | BPI = &WrapperPass->getBPI(); |
5536 | |
5537 | return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
5538 | BFI, BPI, PSI, LI, |
5539 | Opts: InstCombineOptions()); |
5540 | } |
5541 | |
5542 | char InstructionCombiningPass::ID = 0; |
5543 | |
5544 | InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) { |
5545 | initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry()); |
5546 | } |
5547 | |
5548 | INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine" , |
5549 | "Combine redundant instructions" , false, false) |
5550 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
5551 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
5552 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
5553 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
5554 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
5555 | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
5556 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
5557 | INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) |
5558 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
5559 | INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine" , |
5560 | "Combine redundant instructions" , false, false) |
5561 | |
5562 | // Initialization Routines |
5563 | void llvm::initializeInstCombine(PassRegistry &Registry) { |
5564 | initializeInstructionCombiningPassPass(Registry); |
5565 | } |
5566 | |
5567 | FunctionPass *llvm::createInstructionCombiningPass() { |
5568 | return new InstructionCombiningPass(); |
5569 | } |
5570 | |