1 | //===- InstructionCombining.cpp - Combine multiple instructions -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // InstructionCombining - Combine instructions to form fewer, simple |
10 | // instructions. This pass does not modify the CFG. This pass is where |
11 | // algebraic simplification happens. |
12 | // |
13 | // This pass combines things like: |
14 | // %Y = add i32 %X, 1 |
15 | // %Z = add i32 %Y, 1 |
16 | // into: |
17 | // %Z = add i32 %X, 2 |
18 | // |
19 | // This is a simple worklist driven algorithm. |
20 | // |
21 | // This pass guarantees that the following canonicalizations are performed on |
22 | // the program: |
23 | // 1. If a binary operator has a constant operand, it is moved to the RHS |
24 | // 2. Bitwise operators with constant operands are always grouped so that |
25 | // shifts are performed first, then or's, then and's, then xor's. |
26 | // 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible |
27 | // 4. All cmp instructions on boolean values are replaced with logical ops |
28 | // 5. add X, X is represented as (X*2) => (X << 1) |
29 | // 6. Multiplies with a power-of-two constant argument are transformed into |
30 | // shifts. |
31 | // ... etc. |
32 | // |
33 | //===----------------------------------------------------------------------===// |
34 | |
35 | #include "InstCombineInternal.h" |
36 | #include "llvm/ADT/APFloat.h" |
37 | #include "llvm/ADT/APInt.h" |
38 | #include "llvm/ADT/ArrayRef.h" |
39 | #include "llvm/ADT/DenseMap.h" |
40 | #include "llvm/ADT/SmallPtrSet.h" |
41 | #include "llvm/ADT/SmallVector.h" |
42 | #include "llvm/ADT/Statistic.h" |
43 | #include "llvm/Analysis/AliasAnalysis.h" |
44 | #include "llvm/Analysis/AssumptionCache.h" |
45 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
46 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
47 | #include "llvm/Analysis/CFG.h" |
48 | #include "llvm/Analysis/ConstantFolding.h" |
49 | #include "llvm/Analysis/GlobalsModRef.h" |
50 | #include "llvm/Analysis/InstructionSimplify.h" |
51 | #include "llvm/Analysis/LastRunTrackingAnalysis.h" |
52 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" |
53 | #include "llvm/Analysis/MemoryBuiltins.h" |
54 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
55 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
56 | #include "llvm/Analysis/TargetFolder.h" |
57 | #include "llvm/Analysis/TargetLibraryInfo.h" |
58 | #include "llvm/Analysis/TargetTransformInfo.h" |
59 | #include "llvm/Analysis/Utils/Local.h" |
60 | #include "llvm/Analysis/ValueTracking.h" |
61 | #include "llvm/Analysis/VectorUtils.h" |
62 | #include "llvm/IR/BasicBlock.h" |
63 | #include "llvm/IR/CFG.h" |
64 | #include "llvm/IR/Constant.h" |
65 | #include "llvm/IR/Constants.h" |
66 | #include "llvm/IR/DIBuilder.h" |
67 | #include "llvm/IR/DataLayout.h" |
68 | #include "llvm/IR/DebugInfo.h" |
69 | #include "llvm/IR/DerivedTypes.h" |
70 | #include "llvm/IR/Dominators.h" |
71 | #include "llvm/IR/EHPersonalities.h" |
72 | #include "llvm/IR/Function.h" |
73 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
74 | #include "llvm/IR/IRBuilder.h" |
75 | #include "llvm/IR/InstrTypes.h" |
76 | #include "llvm/IR/Instruction.h" |
77 | #include "llvm/IR/Instructions.h" |
78 | #include "llvm/IR/IntrinsicInst.h" |
79 | #include "llvm/IR/Intrinsics.h" |
80 | #include "llvm/IR/Metadata.h" |
81 | #include "llvm/IR/Operator.h" |
82 | #include "llvm/IR/PassManager.h" |
83 | #include "llvm/IR/PatternMatch.h" |
84 | #include "llvm/IR/Type.h" |
85 | #include "llvm/IR/Use.h" |
86 | #include "llvm/IR/User.h" |
87 | #include "llvm/IR/Value.h" |
88 | #include "llvm/IR/ValueHandle.h" |
89 | #include "llvm/InitializePasses.h" |
90 | #include "llvm/Support/Casting.h" |
91 | #include "llvm/Support/CommandLine.h" |
92 | #include "llvm/Support/Compiler.h" |
93 | #include "llvm/Support/Debug.h" |
94 | #include "llvm/Support/DebugCounter.h" |
95 | #include "llvm/Support/ErrorHandling.h" |
96 | #include "llvm/Support/KnownBits.h" |
97 | #include "llvm/Support/KnownFPClass.h" |
98 | #include "llvm/Support/raw_ostream.h" |
99 | #include "llvm/Transforms/InstCombine/InstCombine.h" |
100 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
101 | #include "llvm/Transforms/Utils/Local.h" |
102 | #include <algorithm> |
103 | #include <cassert> |
104 | #include <cstdint> |
105 | #include <memory> |
106 | #include <optional> |
107 | #include <string> |
108 | #include <utility> |
109 | |
110 | #define DEBUG_TYPE "instcombine" |
111 | #include "llvm/Transforms/Utils/InstructionWorklist.h" |
112 | #include <optional> |
113 | |
114 | using namespace llvm; |
115 | using namespace llvm::PatternMatch; |
116 | |
117 | STATISTIC(NumWorklistIterations, |
118 | "Number of instruction combining iterations performed" ); |
119 | STATISTIC(NumOneIteration, "Number of functions with one iteration" ); |
120 | STATISTIC(NumTwoIterations, "Number of functions with two iterations" ); |
121 | STATISTIC(NumThreeIterations, "Number of functions with three iterations" ); |
122 | STATISTIC(NumFourOrMoreIterations, |
123 | "Number of functions with four or more iterations" ); |
124 | |
125 | STATISTIC(NumCombined , "Number of insts combined" ); |
126 | STATISTIC(NumConstProp, "Number of constant folds" ); |
127 | STATISTIC(NumDeadInst , "Number of dead inst eliminated" ); |
128 | STATISTIC(NumSunkInst , "Number of instructions sunk" ); |
129 | STATISTIC(NumExpand, "Number of expansions" ); |
130 | STATISTIC(NumFactor , "Number of factorizations" ); |
131 | STATISTIC(NumReassoc , "Number of reassociations" ); |
132 | DEBUG_COUNTER(VisitCounter, "instcombine-visit" , |
133 | "Controls which instructions are visited" ); |
134 | |
135 | static cl::opt<bool> |
136 | EnableCodeSinking("instcombine-code-sinking" , cl::desc("Enable code sinking" ), |
137 | cl::init(Val: true)); |
138 | |
139 | static cl::opt<unsigned> MaxSinkNumUsers( |
140 | "instcombine-max-sink-users" , cl::init(Val: 32), |
141 | cl::desc("Maximum number of undroppable users for instruction sinking" )); |
142 | |
143 | static cl::opt<unsigned> |
144 | MaxArraySize("instcombine-maxarray-size" , cl::init(Val: 1024), |
145 | cl::desc("Maximum array size considered when doing a combine" )); |
146 | |
147 | // FIXME: Remove this flag when it is no longer necessary to convert |
148 | // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false |
149 | // increases variable availability at the cost of accuracy. Variables that |
150 | // cannot be promoted by mem2reg or SROA will be described as living in memory |
151 | // for their entire lifetime. However, passes like DSE and instcombine can |
152 | // delete stores to the alloca, leading to misleading and inaccurate debug |
153 | // information. This flag can be removed when those passes are fixed. |
154 | static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare" , |
155 | cl::Hidden, cl::init(Val: true)); |
156 | |
157 | std::optional<Instruction *> |
158 | InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { |
159 | // Handle target specific intrinsics |
160 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
161 | return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II); |
162 | } |
163 | return std::nullopt; |
164 | } |
165 | |
166 | std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic( |
167 | IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, |
168 | bool &KnownBitsComputed) { |
169 | // Handle target specific intrinsics |
170 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
171 | return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic( |
172 | IC&: *this, II, DemandedMask, Known, KnownBitsComputed); |
173 | } |
174 | return std::nullopt; |
175 | } |
176 | |
177 | std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic( |
178 | IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts, |
179 | APInt &PoisonElts2, APInt &PoisonElts3, |
180 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
181 | SimplifyAndSetOp) { |
182 | // Handle target specific intrinsics |
183 | if (II.getCalledFunction()->isTargetIntrinsic()) { |
184 | return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic( |
185 | IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3, |
186 | SimplifyAndSetOp); |
187 | } |
188 | return std::nullopt; |
189 | } |
190 | |
191 | bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { |
192 | // Approved exception for TTI use: This queries a legality property of the |
193 | // target, not an profitability heuristic. Ideally this should be part of |
194 | // DataLayout instead. |
195 | return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS); |
196 | } |
197 | |
198 | Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) { |
199 | if (!RewriteGEP) |
200 | return llvm::emitGEPOffset(Builder: &Builder, DL, GEP); |
201 | |
202 | IRBuilderBase::InsertPointGuard Guard(Builder); |
203 | auto *Inst = dyn_cast<Instruction>(Val: GEP); |
204 | if (Inst) |
205 | Builder.SetInsertPoint(Inst); |
206 | |
207 | Value *Offset = EmitGEPOffset(GEP); |
208 | // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic. |
209 | if (Inst && !GEP->hasAllConstantIndices() && |
210 | !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) { |
211 | replaceInstUsesWith( |
212 | I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(), |
213 | IdxList: Offset, Name: "" , NW: GEP->getNoWrapFlags())); |
214 | eraseInstFromFunction(I&: *Inst); |
215 | } |
216 | return Offset; |
217 | } |
218 | |
219 | Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs, |
220 | GEPNoWrapFlags NW, Type *IdxTy, |
221 | bool RewriteGEPs) { |
222 | Value *Sum = nullptr; |
223 | for (GEPOperator *GEP : reverse(C&: GEPs)) { |
224 | Value *Offset = EmitGEPOffset(GEP, RewriteGEP: RewriteGEPs); |
225 | if (Offset->getType() != IdxTy) |
226 | Offset = Builder.CreateVectorSplat( |
227 | EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset); |
228 | if (Sum) |
229 | Sum = Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "" , HasNUW: NW.hasNoUnsignedWrap(), |
230 | HasNSW: NW.isInBounds()); |
231 | else |
232 | Sum = Offset; |
233 | } |
234 | if (!Sum) |
235 | return Constant::getNullValue(Ty: IdxTy); |
236 | return Sum; |
237 | } |
238 | |
239 | /// Legal integers and common types are considered desirable. This is used to |
240 | /// avoid creating instructions with types that may not be supported well by the |
241 | /// the backend. |
242 | /// NOTE: This treats i8, i16 and i32 specially because they are common |
243 | /// types in frontend languages. |
244 | bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const { |
245 | switch (BitWidth) { |
246 | case 8: |
247 | case 16: |
248 | case 32: |
249 | return true; |
250 | default: |
251 | return DL.isLegalInteger(Width: BitWidth); |
252 | } |
253 | } |
254 | |
255 | /// Return true if it is desirable to convert an integer computation from a |
256 | /// given bit width to a new bit width. |
257 | /// We don't want to convert from a legal or desirable type (like i8) to an |
258 | /// illegal type or from a smaller to a larger illegal type. A width of '1' |
259 | /// is always treated as a desirable type because i1 is a fundamental type in |
260 | /// IR, and there are many specialized optimizations for i1 types. |
261 | /// Common/desirable widths are equally treated as legal to convert to, in |
262 | /// order to open up more combining opportunities. |
263 | bool InstCombinerImpl::shouldChangeType(unsigned FromWidth, |
264 | unsigned ToWidth) const { |
265 | bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth); |
266 | bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth); |
267 | |
268 | // Convert to desirable widths even if they are not legal types. |
269 | // Only shrink types, to prevent infinite loops. |
270 | if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth)) |
271 | return true; |
272 | |
273 | // If this is a legal or desiable integer from type, and the result would be |
274 | // an illegal type, don't do the transformation. |
275 | if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal) |
276 | return false; |
277 | |
278 | // Otherwise, if both are illegal, do not increase the size of the result. We |
279 | // do allow things like i160 -> i64, but not i64 -> i160. |
280 | if (!FromLegal && !ToLegal && ToWidth > FromWidth) |
281 | return false; |
282 | |
283 | return true; |
284 | } |
285 | |
286 | /// Return true if it is desirable to convert a computation from 'From' to 'To'. |
287 | /// We don't want to convert from a legal to an illegal type or from a smaller |
288 | /// to a larger illegal type. i1 is always treated as a legal type because it is |
289 | /// a fundamental type in IR, and there are many specialized optimizations for |
290 | /// i1 types. |
291 | bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const { |
292 | // TODO: This could be extended to allow vectors. Datalayout changes might be |
293 | // needed to properly support that. |
294 | if (!From->isIntegerTy() || !To->isIntegerTy()) |
295 | return false; |
296 | |
297 | unsigned FromWidth = From->getPrimitiveSizeInBits(); |
298 | unsigned ToWidth = To->getPrimitiveSizeInBits(); |
299 | return shouldChangeType(FromWidth, ToWidth); |
300 | } |
301 | |
302 | // Return true, if No Signed Wrap should be maintained for I. |
303 | // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C", |
304 | // where both B and C should be ConstantInts, results in a constant that does |
305 | // not overflow. This function only handles the Add/Sub/Mul opcodes. For |
306 | // all other opcodes, the function conservatively returns false. |
307 | static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { |
308 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
309 | if (!OBO || !OBO->hasNoSignedWrap()) |
310 | return false; |
311 | |
312 | const APInt *BVal, *CVal; |
313 | if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal))) |
314 | return false; |
315 | |
316 | // We reason about Add/Sub/Mul Only. |
317 | bool Overflow = false; |
318 | switch (I.getOpcode()) { |
319 | case Instruction::Add: |
320 | (void)BVal->sadd_ov(RHS: *CVal, Overflow); |
321 | break; |
322 | case Instruction::Sub: |
323 | (void)BVal->ssub_ov(RHS: *CVal, Overflow); |
324 | break; |
325 | case Instruction::Mul: |
326 | (void)BVal->smul_ov(RHS: *CVal, Overflow); |
327 | break; |
328 | default: |
329 | // Conservatively return false for other opcodes. |
330 | return false; |
331 | } |
332 | return !Overflow; |
333 | } |
334 | |
335 | static bool hasNoUnsignedWrap(BinaryOperator &I) { |
336 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
337 | return OBO && OBO->hasNoUnsignedWrap(); |
338 | } |
339 | |
340 | static bool hasNoSignedWrap(BinaryOperator &I) { |
341 | auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I); |
342 | return OBO && OBO->hasNoSignedWrap(); |
343 | } |
344 | |
345 | /// Conservatively clears subclassOptionalData after a reassociation or |
346 | /// commutation. We preserve fast-math flags when applicable as they can be |
347 | /// preserved. |
348 | static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { |
349 | FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I); |
350 | if (!FPMO) { |
351 | I.clearSubclassOptionalData(); |
352 | return; |
353 | } |
354 | |
355 | FastMathFlags FMF = I.getFastMathFlags(); |
356 | I.clearSubclassOptionalData(); |
357 | I.setFastMathFlags(FMF); |
358 | } |
359 | |
360 | /// Combine constant operands of associative operations either before or after a |
361 | /// cast to eliminate one of the associative operations: |
362 | /// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2))) |
363 | /// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2)) |
364 | static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1, |
365 | InstCombinerImpl &IC) { |
366 | auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0)); |
367 | if (!Cast || !Cast->hasOneUse()) |
368 | return false; |
369 | |
370 | // TODO: Enhance logic for other casts and remove this check. |
371 | auto CastOpcode = Cast->getOpcode(); |
372 | if (CastOpcode != Instruction::ZExt) |
373 | return false; |
374 | |
375 | // TODO: Enhance logic for other BinOps and remove this check. |
376 | if (!BinOp1->isBitwiseLogicOp()) |
377 | return false; |
378 | |
379 | auto AssocOpcode = BinOp1->getOpcode(); |
380 | auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0)); |
381 | if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode) |
382 | return false; |
383 | |
384 | Constant *C1, *C2; |
385 | if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) || |
386 | !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2))) |
387 | return false; |
388 | |
389 | // TODO: This assumes a zext cast. |
390 | // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2 |
391 | // to the destination type might lose bits. |
392 | |
393 | // Fold the constants together in the destination type: |
394 | // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC) |
395 | const DataLayout &DL = IC.getDataLayout(); |
396 | Type *DestTy = C1->getType(); |
397 | Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL); |
398 | if (!CastC2) |
399 | return false; |
400 | Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL); |
401 | if (!FoldedC) |
402 | return false; |
403 | |
404 | IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0)); |
405 | IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC); |
406 | BinOp1->dropPoisonGeneratingFlags(); |
407 | Cast->dropPoisonGeneratingFlags(); |
408 | return true; |
409 | } |
410 | |
411 | // Simplifies IntToPtr/PtrToInt RoundTrip Cast. |
412 | // inttoptr ( ptrtoint (x) ) --> x |
413 | Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) { |
414 | auto *IntToPtr = dyn_cast<IntToPtrInst>(Val); |
415 | if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) == |
416 | DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) { |
417 | auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0)); |
418 | Type *CastTy = IntToPtr->getDestTy(); |
419 | if (PtrToInt && |
420 | CastTy->getPointerAddressSpace() == |
421 | PtrToInt->getSrcTy()->getPointerAddressSpace() && |
422 | DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) == |
423 | DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy())) |
424 | return PtrToInt->getOperand(i_nocapture: 0); |
425 | } |
426 | return nullptr; |
427 | } |
428 | |
429 | /// This performs a few simplifications for operators that are associative or |
430 | /// commutative: |
431 | /// |
432 | /// Commutative operators: |
433 | /// |
434 | /// 1. Order operands such that they are listed from right (least complex) to |
435 | /// left (most complex). This puts constants before unary operators before |
436 | /// binary operators. |
437 | /// |
438 | /// Associative operators: |
439 | /// |
440 | /// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
441 | /// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
442 | /// |
443 | /// Associative and commutative operators: |
444 | /// |
445 | /// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
446 | /// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
447 | /// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
448 | /// if C1 and C2 are constants. |
449 | bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) { |
450 | Instruction::BinaryOps Opcode = I.getOpcode(); |
451 | bool Changed = false; |
452 | |
453 | do { |
454 | // Order operands such that they are listed from right (least complex) to |
455 | // left (most complex). This puts constants before unary operators before |
456 | // binary operators. |
457 | if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) < |
458 | getComplexity(V: I.getOperand(i_nocapture: 1))) |
459 | Changed = !I.swapOperands(); |
460 | |
461 | if (I.isCommutative()) { |
462 | if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) { |
463 | replaceOperand(I, OpNum: 0, V: Pair->first); |
464 | replaceOperand(I, OpNum: 1, V: Pair->second); |
465 | Changed = true; |
466 | } |
467 | } |
468 | |
469 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0)); |
470 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1)); |
471 | |
472 | if (I.isAssociative()) { |
473 | // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies. |
474 | if (Op0 && Op0->getOpcode() == Opcode) { |
475 | Value *A = Op0->getOperand(i_nocapture: 0); |
476 | Value *B = Op0->getOperand(i_nocapture: 1); |
477 | Value *C = I.getOperand(i_nocapture: 1); |
478 | |
479 | // Does "B op C" simplify? |
480 | if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) { |
481 | // It simplifies to V. Form "A op V". |
482 | replaceOperand(I, OpNum: 0, V: A); |
483 | replaceOperand(I, OpNum: 1, V); |
484 | bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0); |
485 | bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0); |
486 | |
487 | // Conservatively clear all optional flags since they may not be |
488 | // preserved by the reassociation. Reset nsw/nuw based on the above |
489 | // analysis. |
490 | ClearSubclassDataAfterReassociation(I); |
491 | |
492 | // Note: this is only valid because SimplifyBinOp doesn't look at |
493 | // the operands to Op0. |
494 | if (IsNUW) |
495 | I.setHasNoUnsignedWrap(true); |
496 | |
497 | if (IsNSW) |
498 | I.setHasNoSignedWrap(true); |
499 | |
500 | Changed = true; |
501 | ++NumReassoc; |
502 | continue; |
503 | } |
504 | } |
505 | |
506 | // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies. |
507 | if (Op1 && Op1->getOpcode() == Opcode) { |
508 | Value *A = I.getOperand(i_nocapture: 0); |
509 | Value *B = Op1->getOperand(i_nocapture: 0); |
510 | Value *C = Op1->getOperand(i_nocapture: 1); |
511 | |
512 | // Does "A op B" simplify? |
513 | if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) { |
514 | // It simplifies to V. Form "V op C". |
515 | replaceOperand(I, OpNum: 0, V); |
516 | replaceOperand(I, OpNum: 1, V: C); |
517 | // Conservatively clear the optional flags, since they may not be |
518 | // preserved by the reassociation. |
519 | ClearSubclassDataAfterReassociation(I); |
520 | Changed = true; |
521 | ++NumReassoc; |
522 | continue; |
523 | } |
524 | } |
525 | } |
526 | |
527 | if (I.isAssociative() && I.isCommutative()) { |
528 | if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) { |
529 | Changed = true; |
530 | ++NumReassoc; |
531 | continue; |
532 | } |
533 | |
534 | // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies. |
535 | if (Op0 && Op0->getOpcode() == Opcode) { |
536 | Value *A = Op0->getOperand(i_nocapture: 0); |
537 | Value *B = Op0->getOperand(i_nocapture: 1); |
538 | Value *C = I.getOperand(i_nocapture: 1); |
539 | |
540 | // Does "C op A" simplify? |
541 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
542 | // It simplifies to V. Form "V op B". |
543 | replaceOperand(I, OpNum: 0, V); |
544 | replaceOperand(I, OpNum: 1, V: B); |
545 | // Conservatively clear the optional flags, since they may not be |
546 | // preserved by the reassociation. |
547 | ClearSubclassDataAfterReassociation(I); |
548 | Changed = true; |
549 | ++NumReassoc; |
550 | continue; |
551 | } |
552 | } |
553 | |
554 | // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies. |
555 | if (Op1 && Op1->getOpcode() == Opcode) { |
556 | Value *A = I.getOperand(i_nocapture: 0); |
557 | Value *B = Op1->getOperand(i_nocapture: 0); |
558 | Value *C = Op1->getOperand(i_nocapture: 1); |
559 | |
560 | // Does "C op A" simplify? |
561 | if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) { |
562 | // It simplifies to V. Form "B op V". |
563 | replaceOperand(I, OpNum: 0, V: B); |
564 | replaceOperand(I, OpNum: 1, V); |
565 | // Conservatively clear the optional flags, since they may not be |
566 | // preserved by the reassociation. |
567 | ClearSubclassDataAfterReassociation(I); |
568 | Changed = true; |
569 | ++NumReassoc; |
570 | continue; |
571 | } |
572 | } |
573 | |
574 | // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" |
575 | // if C1 and C2 are constants. |
576 | Value *A, *B; |
577 | Constant *C1, *C2, *CRes; |
578 | if (Op0 && Op1 && |
579 | Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && |
580 | match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) && |
581 | match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) && |
582 | (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) { |
583 | bool IsNUW = hasNoUnsignedWrap(I) && |
584 | hasNoUnsignedWrap(I&: *Op0) && |
585 | hasNoUnsignedWrap(I&: *Op1); |
586 | BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? |
587 | BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) : |
588 | BinaryOperator::Create(Op: Opcode, S1: A, S2: B); |
589 | |
590 | if (isa<FPMathOperator>(Val: NewBO)) { |
591 | FastMathFlags Flags = I.getFastMathFlags() & |
592 | Op0->getFastMathFlags() & |
593 | Op1->getFastMathFlags(); |
594 | NewBO->setFastMathFlags(Flags); |
595 | } |
596 | InsertNewInstWith(New: NewBO, Old: I.getIterator()); |
597 | NewBO->takeName(V: Op1); |
598 | replaceOperand(I, OpNum: 0, V: NewBO); |
599 | replaceOperand(I, OpNum: 1, V: CRes); |
600 | // Conservatively clear the optional flags, since they may not be |
601 | // preserved by the reassociation. |
602 | ClearSubclassDataAfterReassociation(I); |
603 | if (IsNUW) |
604 | I.setHasNoUnsignedWrap(true); |
605 | |
606 | Changed = true; |
607 | continue; |
608 | } |
609 | } |
610 | |
611 | // No further simplifications. |
612 | return Changed; |
613 | } while (true); |
614 | } |
615 | |
616 | /// Return whether "X LOp (Y ROp Z)" is always equal to |
617 | /// "(X LOp Y) ROp (X LOp Z)". |
618 | static bool leftDistributesOverRight(Instruction::BinaryOps LOp, |
619 | Instruction::BinaryOps ROp) { |
620 | // X & (Y | Z) <--> (X & Y) | (X & Z) |
621 | // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) |
622 | if (LOp == Instruction::And) |
623 | return ROp == Instruction::Or || ROp == Instruction::Xor; |
624 | |
625 | // X | (Y & Z) <--> (X | Y) & (X | Z) |
626 | if (LOp == Instruction::Or) |
627 | return ROp == Instruction::And; |
628 | |
629 | // X * (Y + Z) <--> (X * Y) + (X * Z) |
630 | // X * (Y - Z) <--> (X * Y) - (X * Z) |
631 | if (LOp == Instruction::Mul) |
632 | return ROp == Instruction::Add || ROp == Instruction::Sub; |
633 | |
634 | return false; |
635 | } |
636 | |
637 | /// Return whether "(X LOp Y) ROp Z" is always equal to |
638 | /// "(X ROp Z) LOp (Y ROp Z)". |
639 | static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, |
640 | Instruction::BinaryOps ROp) { |
641 | if (Instruction::isCommutative(Opcode: ROp)) |
642 | return leftDistributesOverRight(LOp: ROp, ROp: LOp); |
643 | |
644 | // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. |
645 | return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp); |
646 | |
647 | // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", |
648 | // but this requires knowing that the addition does not overflow and other |
649 | // such subtleties. |
650 | } |
651 | |
652 | /// This function returns identity value for given opcode, which can be used to |
653 | /// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). |
654 | static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { |
655 | if (isa<Constant>(Val: V)) |
656 | return nullptr; |
657 | |
658 | return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType()); |
659 | } |
660 | |
661 | /// This function predicates factorization using distributive laws. By default, |
662 | /// it just returns the 'Op' inputs. But for special-cases like |
663 | /// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add |
664 | /// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to |
665 | /// allow more factorization opportunities. |
666 | static Instruction::BinaryOps |
667 | getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, |
668 | Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) { |
669 | assert(Op && "Expected a binary operator" ); |
670 | LHS = Op->getOperand(i_nocapture: 0); |
671 | RHS = Op->getOperand(i_nocapture: 1); |
672 | if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { |
673 | Constant *C; |
674 | if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) { |
675 | // X << C --> X * (1 << C) |
676 | RHS = ConstantFoldBinaryInstruction( |
677 | Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C); |
678 | assert(RHS && "Constant folding of immediate constants failed" ); |
679 | return Instruction::Mul; |
680 | } |
681 | // TODO: We can add other conversions e.g. shr => div etc. |
682 | } |
683 | if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) { |
684 | if (OtherOp && OtherOp->getOpcode() == Instruction::AShr && |
685 | match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) { |
686 | // lshr nneg C, X --> ashr nneg C, X |
687 | return Instruction::AShr; |
688 | } |
689 | } |
690 | return Op->getOpcode(); |
691 | } |
692 | |
693 | /// This tries to simplify binary operations by factorizing out common terms |
694 | /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). |
695 | static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ, |
696 | InstCombiner::BuilderTy &Builder, |
697 | Instruction::BinaryOps InnerOpcode, Value *A, |
698 | Value *B, Value *C, Value *D) { |
699 | assert(A && B && C && D && "All values must be provided" ); |
700 | |
701 | Value *V = nullptr; |
702 | Value *RetVal = nullptr; |
703 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
704 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
705 | |
706 | // Does "X op' Y" always equal "Y op' X"? |
707 | bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode); |
708 | |
709 | // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? |
710 | if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) { |
711 | // Does the instruction have the form "(A op' B) op (A op' D)" or, in the |
712 | // commutative case, "(A op' B) op (C op' A)"? |
713 | if (A == C || (InnerCommutative && A == D)) { |
714 | if (A != C) |
715 | std::swap(a&: C, b&: D); |
716 | // Consider forming "A op' (B op D)". |
717 | // If "B op D" simplifies then it can be formed with no cost. |
718 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I)); |
719 | |
720 | // If "B op D" doesn't simplify then only go on if one of the existing |
721 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
722 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
723 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName()); |
724 | if (V) |
725 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V); |
726 | } |
727 | } |
728 | |
729 | // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? |
730 | if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) { |
731 | // Does the instruction have the form "(A op' B) op (C op' B)" or, in the |
732 | // commutative case, "(A op' B) op (B op' D)"? |
733 | if (B == D || (InnerCommutative && B == C)) { |
734 | if (B != D) |
735 | std::swap(a&: C, b&: D); |
736 | // Consider forming "(A op C) op' B". |
737 | // If "A op C" simplifies then it can be formed with no cost. |
738 | V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I)); |
739 | |
740 | // If "A op C" doesn't simplify then only go on if one of the existing |
741 | // operations "A op' B" and "C op' D" will be zapped as no longer used. |
742 | if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) |
743 | V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName()); |
744 | if (V) |
745 | RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B); |
746 | } |
747 | } |
748 | |
749 | if (!RetVal) |
750 | return nullptr; |
751 | |
752 | ++NumFactor; |
753 | RetVal->takeName(V: &I); |
754 | |
755 | // Try to add no-overflow flags to the final value. |
756 | if (isa<BinaryOperator>(Val: RetVal)) { |
757 | bool HasNSW = false; |
758 | bool HasNUW = false; |
759 | if (isa<OverflowingBinaryOperator>(Val: &I)) { |
760 | HasNSW = I.hasNoSignedWrap(); |
761 | HasNUW = I.hasNoUnsignedWrap(); |
762 | } |
763 | if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) { |
764 | HasNSW &= LOBO->hasNoSignedWrap(); |
765 | HasNUW &= LOBO->hasNoUnsignedWrap(); |
766 | } |
767 | |
768 | if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) { |
769 | HasNSW &= ROBO->hasNoSignedWrap(); |
770 | HasNUW &= ROBO->hasNoUnsignedWrap(); |
771 | } |
772 | |
773 | if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) { |
774 | // We can propagate 'nsw' if we know that |
775 | // %Y = mul nsw i16 %X, C |
776 | // %Z = add nsw i16 %Y, %X |
777 | // => |
778 | // %Z = mul nsw i16 %X, C+1 |
779 | // |
780 | // iff C+1 isn't INT_MIN |
781 | const APInt *CInt; |
782 | if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue()) |
783 | cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW); |
784 | |
785 | // nuw can be propagated with any constant or nuw value. |
786 | cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW); |
787 | } |
788 | } |
789 | return RetVal; |
790 | } |
791 | |
792 | // If `I` has one Const operand and the other matches `(ctpop (not x))`, |
793 | // replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`. |
794 | // This is only useful is the new subtract can fold so we only handle the |
795 | // following cases: |
796 | // 1) (add/sub/disjoint_or C, (ctpop (not x)) |
797 | // -> (add/sub/disjoint_or C', (ctpop x)) |
798 | // 1) (cmp pred C, (ctpop (not x)) |
799 | // -> (cmp pred C', (ctpop x)) |
800 | Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) { |
801 | unsigned Opc = I->getOpcode(); |
802 | unsigned ConstIdx = 1; |
803 | switch (Opc) { |
804 | default: |
805 | return nullptr; |
806 | // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x)) |
807 | // We can fold the BitWidth(x) with add/sub/icmp as long the other operand |
808 | // is constant. |
809 | case Instruction::Sub: |
810 | ConstIdx = 0; |
811 | break; |
812 | case Instruction::ICmp: |
813 | // Signed predicates aren't correct in some edge cases like for i2 types, as |
814 | // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed |
815 | // comparisons against it are simplfied to unsigned. |
816 | if (cast<ICmpInst>(Val: I)->isSigned()) |
817 | return nullptr; |
818 | break; |
819 | case Instruction::Or: |
820 | if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value()))) |
821 | return nullptr; |
822 | [[fallthrough]]; |
823 | case Instruction::Add: |
824 | break; |
825 | } |
826 | |
827 | Value *Op; |
828 | // Find ctpop. |
829 | if (!match(V: I->getOperand(i: 1 - ConstIdx), |
830 | P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op))))) |
831 | return nullptr; |
832 | |
833 | Constant *C; |
834 | // Check other operand is ImmConstant. |
835 | if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C))) |
836 | return nullptr; |
837 | |
838 | Type *Ty = Op->getType(); |
839 | Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits()); |
840 | // Need extra check for icmp. Note if this check is true, it generally means |
841 | // the icmp will simplify to true/false. |
842 | if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) { |
843 | Constant *Cmp = |
844 | ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL); |
845 | if (!Cmp || !Cmp->isZeroValue()) |
846 | return nullptr; |
847 | } |
848 | |
849 | // Check we can invert `(not x)` for free. |
850 | bool Consumes = false; |
851 | if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes) |
852 | return nullptr; |
853 | Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder); |
854 | assert(NotOp != nullptr && |
855 | "Desync between isFreeToInvert and getFreelyInverted" ); |
856 | |
857 | Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp); |
858 | |
859 | Value *R = nullptr; |
860 | |
861 | // Do the transformation here to avoid potentially introducing an infinite |
862 | // loop. |
863 | switch (Opc) { |
864 | case Instruction::Sub: |
865 | R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC)); |
866 | break; |
867 | case Instruction::Or: |
868 | case Instruction::Add: |
869 | R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp); |
870 | break; |
871 | case Instruction::ICmp: |
872 | R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(), |
873 | LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C)); |
874 | break; |
875 | default: |
876 | llvm_unreachable("Unhandled Opcode" ); |
877 | } |
878 | assert(R != nullptr); |
879 | return replaceInstUsesWith(I&: *I, V: R); |
880 | } |
881 | |
882 | // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C)) |
883 | // IFF |
884 | // 1) the logic_shifts match |
885 | // 2) either both binops are binops and one is `and` or |
886 | // BinOp1 is `and` |
887 | // (logic_shift (inv_logic_shift C1, C), C) == C1 or |
888 | // |
889 | // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C) |
890 | // |
891 | // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt)) |
892 | // IFF |
893 | // 1) the logic_shifts match |
894 | // 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`). |
895 | // |
896 | // -> (BinOp (logic_shift (BinOp X, Y)), Mask) |
897 | // |
898 | // (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt)) |
899 | // IFF |
900 | // 1) Binop1 is bitwise logical operator `and`, `or` or `xor` |
901 | // 2) Binop2 is `not` |
902 | // |
903 | // -> (arithmetic_shift Binop1((not X), Y), Amt) |
904 | |
905 | Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) { |
906 | const DataLayout &DL = I.getDataLayout(); |
907 | auto IsValidBinOpc = [](unsigned Opc) { |
908 | switch (Opc) { |
909 | default: |
910 | return false; |
911 | case Instruction::And: |
912 | case Instruction::Or: |
913 | case Instruction::Xor: |
914 | case Instruction::Add: |
915 | // Skip Sub as we only match constant masks which will canonicalize to use |
916 | // add. |
917 | return true; |
918 | } |
919 | }; |
920 | |
921 | // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra |
922 | // constraints. |
923 | auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2, |
924 | unsigned ShOpc) { |
925 | assert(ShOpc != Instruction::AShr); |
926 | return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) || |
927 | ShOpc == Instruction::Shl; |
928 | }; |
929 | |
930 | auto GetInvShift = [](unsigned ShOpc) { |
931 | assert(ShOpc != Instruction::AShr); |
932 | return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr; |
933 | }; |
934 | |
935 | auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2, |
936 | unsigned ShOpc, Constant *CMask, |
937 | Constant *CShift) { |
938 | // If the BinOp1 is `and` we don't need to check the mask. |
939 | if (BinOpc1 == Instruction::And) |
940 | return true; |
941 | |
942 | // For all other possible transfers we need complete distributable |
943 | // binop/shift (anything but `add` + `lshr`). |
944 | if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc)) |
945 | return false; |
946 | |
947 | // If BinOp2 is `and`, any mask works (this only really helps for non-splat |
948 | // vecs, otherwise the mask will be simplified and the following check will |
949 | // handle it). |
950 | if (BinOpc2 == Instruction::And) |
951 | return true; |
952 | |
953 | // Otherwise, need mask that meets the below requirement. |
954 | // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask |
955 | Constant *MaskInvShift = |
956 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
957 | return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) == |
958 | CMask; |
959 | }; |
960 | |
961 | auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * { |
962 | Constant *CMask, *CShift; |
963 | Value *X, *Y, *ShiftedX, *Mask, *Shift; |
964 | if (!match(V: I.getOperand(i_nocapture: ShOpnum), |
965 | P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift))))) |
966 | return nullptr; |
967 | if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum), |
968 | P: m_c_BinOp(L: m_CombineAnd( |
969 | L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))), |
970 | R: m_Value(V&: ShiftedX)), |
971 | R: m_Value(V&: Mask)))) |
972 | return nullptr; |
973 | // Make sure we are matching instruction shifts and not ConstantExpr |
974 | auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum)); |
975 | auto *IX = dyn_cast<Instruction>(Val: ShiftedX); |
976 | if (!IY || !IX) |
977 | return nullptr; |
978 | |
979 | // LHS and RHS need same shift opcode |
980 | unsigned ShOpc = IY->getOpcode(); |
981 | if (ShOpc != IX->getOpcode()) |
982 | return nullptr; |
983 | |
984 | // Make sure binop is real instruction and not ConstantExpr |
985 | auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum)); |
986 | if (!BO2) |
987 | return nullptr; |
988 | |
989 | unsigned BinOpc = BO2->getOpcode(); |
990 | // Make sure we have valid binops. |
991 | if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc)) |
992 | return nullptr; |
993 | |
994 | if (ShOpc == Instruction::AShr) { |
995 | if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) && |
996 | BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) { |
997 | Value *NotX = Builder.CreateNot(V: X); |
998 | Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX); |
999 | return BinaryOperator::Create( |
1000 | Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift); |
1001 | } |
1002 | |
1003 | return nullptr; |
1004 | } |
1005 | |
1006 | // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just |
1007 | // distribute to drop the shift irrelevant of constants. |
1008 | if (BinOpc == I.getOpcode() && |
1009 | IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) { |
1010 | Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y); |
1011 | Value *NewBinOp1 = Builder.CreateBinOp( |
1012 | Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift); |
1013 | return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask); |
1014 | } |
1015 | |
1016 | // Otherwise we can only distribute by constant shifting the mask, so |
1017 | // ensure we have constants. |
1018 | if (!match(V: Shift, P: m_ImmConstant(C&: CShift))) |
1019 | return nullptr; |
1020 | if (!match(V: Mask, P: m_ImmConstant(C&: CMask))) |
1021 | return nullptr; |
1022 | |
1023 | // Check if we can distribute the binops. |
1024 | if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift)) |
1025 | return nullptr; |
1026 | |
1027 | Constant *NewCMask = |
1028 | ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL); |
1029 | Value *NewBinOp2 = Builder.CreateBinOp( |
1030 | Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask); |
1031 | Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2); |
1032 | return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc), |
1033 | S1: NewBinOp1, S2: CShift); |
1034 | }; |
1035 | |
1036 | if (Instruction *R = MatchBinOp(0)) |
1037 | return R; |
1038 | return MatchBinOp(1); |
1039 | } |
1040 | |
1041 | // (Binop (zext C), (select C, T, F)) |
1042 | // -> (select C, (binop 1, T), (binop 0, F)) |
1043 | // |
1044 | // (Binop (sext C), (select C, T, F)) |
1045 | // -> (select C, (binop -1, T), (binop 0, F)) |
1046 | // |
1047 | // Attempt to simplify binary operations into a select with folded args, when |
1048 | // one operand of the binop is a select instruction and the other operand is a |
1049 | // zext/sext extension, whose value is the select condition. |
1050 | Instruction * |
1051 | InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) { |
1052 | // TODO: this simplification may be extended to any speculatable instruction, |
1053 | // not just binops, and would possibly be handled better in FoldOpIntoSelect. |
1054 | Instruction::BinaryOps Opc = I.getOpcode(); |
1055 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1056 | Value *A, *CondVal, *TrueVal, *FalseVal; |
1057 | Value *CastOp; |
1058 | |
1059 | auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) { |
1060 | return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) && |
1061 | A->getType()->getScalarSizeInBits() == 1 && |
1062 | match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal), |
1063 | R: m_Value(V&: FalseVal))); |
1064 | }; |
1065 | |
1066 | // Make sure one side of the binop is a select instruction, and the other is a |
1067 | // zero/sign extension operating on a i1. |
1068 | if (MatchSelectAndCast(LHS, RHS)) |
1069 | CastOp = LHS; |
1070 | else if (MatchSelectAndCast(RHS, LHS)) |
1071 | CastOp = RHS; |
1072 | else |
1073 | return nullptr; |
1074 | |
1075 | auto NewFoldedConst = [&](bool IsTrueArm, Value *V) { |
1076 | bool IsCastOpRHS = (CastOp == RHS); |
1077 | bool IsZExt = isa<ZExtInst>(Val: CastOp); |
1078 | Constant *C; |
1079 | |
1080 | if (IsTrueArm) { |
1081 | C = Constant::getNullValue(Ty: V->getType()); |
1082 | } else if (IsZExt) { |
1083 | unsigned BitWidth = V->getType()->getScalarSizeInBits(); |
1084 | C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1)); |
1085 | } else { |
1086 | C = Constant::getAllOnesValue(Ty: V->getType()); |
1087 | } |
1088 | |
1089 | return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C) |
1090 | : Builder.CreateBinOp(Opc, LHS: C, RHS: V); |
1091 | }; |
1092 | |
1093 | // If the value used in the zext/sext is the select condition, or the negated |
1094 | // of the select condition, the binop can be simplified. |
1095 | if (CondVal == A) { |
1096 | Value *NewTrueVal = NewFoldedConst(false, TrueVal); |
1097 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
1098 | S2: NewFoldedConst(true, FalseVal)); |
1099 | } |
1100 | |
1101 | if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) { |
1102 | Value *NewTrueVal = NewFoldedConst(true, TrueVal); |
1103 | return SelectInst::Create(C: CondVal, S1: NewTrueVal, |
1104 | S2: NewFoldedConst(false, FalseVal)); |
1105 | } |
1106 | |
1107 | return nullptr; |
1108 | } |
1109 | |
1110 | Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) { |
1111 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1112 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
1113 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
1114 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
1115 | Value *A, *B, *C, *D; |
1116 | Instruction::BinaryOps LHSOpcode, RHSOpcode; |
1117 | |
1118 | if (Op0) |
1119 | LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1); |
1120 | if (Op1) |
1121 | RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0); |
1122 | |
1123 | // The instruction has the form "(A op' B) op (C op' D)". Try to factorize |
1124 | // a common term. |
1125 | if (Op0 && Op1 && LHSOpcode == RHSOpcode) |
1126 | if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D)) |
1127 | return V; |
1128 | |
1129 | // The instruction has the form "(A op' B) op (C)". Try to factorize common |
1130 | // term. |
1131 | if (Op0) |
1132 | if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS)) |
1133 | if (Value *V = |
1134 | tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident)) |
1135 | return V; |
1136 | |
1137 | // The instruction has the form "(B) op (C op' D)". Try to factorize common |
1138 | // term. |
1139 | if (Op1) |
1140 | if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS)) |
1141 | if (Value *V = |
1142 | tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D)) |
1143 | return V; |
1144 | |
1145 | return nullptr; |
1146 | } |
1147 | |
1148 | /// This tries to simplify binary operations which some other binary operation |
1149 | /// distributes over either by factorizing out common terms |
1150 | /// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in |
1151 | /// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win). |
1152 | /// Returns the simplified value, or null if it didn't simplify. |
1153 | Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) { |
1154 | Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1); |
1155 | BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS); |
1156 | BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS); |
1157 | Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); |
1158 | |
1159 | // Factorization. |
1160 | if (Value *R = tryFactorizationFolds(I)) |
1161 | return R; |
1162 | |
1163 | // Expansion. |
1164 | if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) { |
1165 | // The instruction has the form "(A op' B) op C". See if expanding it out |
1166 | // to "(A op C) op' (B op C)" results in simplifications. |
1167 | Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS; |
1168 | Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' |
1169 | |
1170 | // Disable the use of undef because it's not safe to distribute undef. |
1171 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
1172 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
1173 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive); |
1174 | |
1175 | // Do "A op C" and "B op C" both simplify? |
1176 | if (L && R) { |
1177 | // They do! Return "L op' R". |
1178 | ++NumExpand; |
1179 | C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
1180 | C->takeName(V: &I); |
1181 | return C; |
1182 | } |
1183 | |
1184 | // Does "A op C" simplify to the identity value for the inner opcode? |
1185 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
1186 | // They do! Return "B op C". |
1187 | ++NumExpand; |
1188 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C); |
1189 | C->takeName(V: &I); |
1190 | return C; |
1191 | } |
1192 | |
1193 | // Does "B op C" simplify to the identity value for the inner opcode? |
1194 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
1195 | // They do! Return "A op C". |
1196 | ++NumExpand; |
1197 | C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
1198 | C->takeName(V: &I); |
1199 | return C; |
1200 | } |
1201 | } |
1202 | |
1203 | if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) { |
1204 | // The instruction has the form "A op (B op' C)". See if expanding it out |
1205 | // to "(A op B) op' (A op C)" results in simplifications. |
1206 | Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1); |
1207 | Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' |
1208 | |
1209 | // Disable the use of undef because it's not safe to distribute undef. |
1210 | auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef(); |
1211 | Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive); |
1212 | Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive); |
1213 | |
1214 | // Do "A op B" and "A op C" both simplify? |
1215 | if (L && R) { |
1216 | // They do! Return "L op' R". |
1217 | ++NumExpand; |
1218 | A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R); |
1219 | A->takeName(V: &I); |
1220 | return A; |
1221 | } |
1222 | |
1223 | // Does "A op B" simplify to the identity value for the inner opcode? |
1224 | if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) { |
1225 | // They do! Return "A op C". |
1226 | ++NumExpand; |
1227 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C); |
1228 | A->takeName(V: &I); |
1229 | return A; |
1230 | } |
1231 | |
1232 | // Does "A op C" simplify to the identity value for the inner opcode? |
1233 | if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) { |
1234 | // They do! Return "A op B". |
1235 | ++NumExpand; |
1236 | A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B); |
1237 | A->takeName(V: &I); |
1238 | return A; |
1239 | } |
1240 | } |
1241 | |
1242 | return SimplifySelectsFeedingBinaryOp(I, LHS, RHS); |
1243 | } |
1244 | |
1245 | static std::optional<std::pair<Value *, Value *>> |
1246 | matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) { |
1247 | if (LHS->getParent() != RHS->getParent()) |
1248 | return std::nullopt; |
1249 | |
1250 | if (LHS->getNumIncomingValues() < 2) |
1251 | return std::nullopt; |
1252 | |
1253 | if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks())) |
1254 | return std::nullopt; |
1255 | |
1256 | Value *L0 = LHS->getIncomingValue(i: 0); |
1257 | Value *R0 = RHS->getIncomingValue(i: 0); |
1258 | |
1259 | for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) { |
1260 | Value *L1 = LHS->getIncomingValue(i: I); |
1261 | Value *R1 = RHS->getIncomingValue(i: I); |
1262 | |
1263 | if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1)) |
1264 | continue; |
1265 | |
1266 | return std::nullopt; |
1267 | } |
1268 | |
1269 | return std::optional(std::pair(L0, R0)); |
1270 | } |
1271 | |
1272 | std::optional<std::pair<Value *, Value *>> |
1273 | InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) { |
1274 | Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS); |
1275 | Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS); |
1276 | if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode()) |
1277 | return std::nullopt; |
1278 | switch (LHSInst->getOpcode()) { |
1279 | case Instruction::PHI: |
1280 | return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS)); |
1281 | case Instruction::Select: { |
1282 | Value *Cond = LHSInst->getOperand(i: 0); |
1283 | Value *TrueVal = LHSInst->getOperand(i: 1); |
1284 | Value *FalseVal = LHSInst->getOperand(i: 2); |
1285 | if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) && |
1286 | FalseVal == RHSInst->getOperand(i: 1)) |
1287 | return std::pair(TrueVal, FalseVal); |
1288 | return std::nullopt; |
1289 | } |
1290 | case Instruction::Call: { |
1291 | // Match min(a, b) and max(a, b) |
1292 | MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst); |
1293 | MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst); |
1294 | if (LHSMinMax && RHSMinMax && |
1295 | LHSMinMax->getPredicate() == |
1296 | ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) && |
1297 | ((LHSMinMax->getLHS() == RHSMinMax->getLHS() && |
1298 | LHSMinMax->getRHS() == RHSMinMax->getRHS()) || |
1299 | (LHSMinMax->getLHS() == RHSMinMax->getRHS() && |
1300 | LHSMinMax->getRHS() == RHSMinMax->getLHS()))) |
1301 | return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS()); |
1302 | return std::nullopt; |
1303 | } |
1304 | default: |
1305 | return std::nullopt; |
1306 | } |
1307 | } |
1308 | |
1309 | Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I, |
1310 | Value *LHS, |
1311 | Value *RHS) { |
1312 | Value *A, *B, *C, *D, *E, *F; |
1313 | bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C))); |
1314 | bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F))); |
1315 | if (!LHSIsSelect && !RHSIsSelect) |
1316 | return nullptr; |
1317 | |
1318 | FastMathFlags FMF; |
1319 | BuilderTy::FastMathFlagGuard Guard(Builder); |
1320 | if (isa<FPMathOperator>(Val: &I)) { |
1321 | FMF = I.getFastMathFlags(); |
1322 | Builder.setFastMathFlags(FMF); |
1323 | } |
1324 | |
1325 | Instruction::BinaryOps Opcode = I.getOpcode(); |
1326 | SimplifyQuery Q = SQ.getWithInstruction(I: &I); |
1327 | |
1328 | Value *Cond, *True = nullptr, *False = nullptr; |
1329 | |
1330 | // Special-case for add/negate combination. Replace the zero in the negation |
1331 | // with the trailing add operand: |
1332 | // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N) |
1333 | // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False |
1334 | auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * { |
1335 | // We need an 'add' and exactly 1 arm of the select to have been simplified. |
1336 | if (Opcode != Instruction::Add || (!True && !False) || (True && False)) |
1337 | return nullptr; |
1338 | |
1339 | Value *N; |
1340 | if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) { |
1341 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
1342 | return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName()); |
1343 | } |
1344 | if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) { |
1345 | Value *Sub = Builder.CreateSub(LHS: Z, RHS: N); |
1346 | return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName()); |
1347 | } |
1348 | return nullptr; |
1349 | }; |
1350 | |
1351 | if (LHSIsSelect && RHSIsSelect && A == D) { |
1352 | // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F) |
1353 | Cond = A; |
1354 | True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q); |
1355 | False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q); |
1356 | |
1357 | if (LHS->hasOneUse() && RHS->hasOneUse()) { |
1358 | if (False && !True) |
1359 | True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E); |
1360 | else if (True && !False) |
1361 | False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F); |
1362 | } |
1363 | } else if (LHSIsSelect && LHS->hasOneUse()) { |
1364 | // (A ? B : C) op Y -> A ? (B op Y) : (C op Y) |
1365 | Cond = A; |
1366 | True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q); |
1367 | False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q); |
1368 | if (Value *NewSel = foldAddNegate(B, C, RHS)) |
1369 | return NewSel; |
1370 | } else if (RHSIsSelect && RHS->hasOneUse()) { |
1371 | // X op (D ? E : F) -> D ? (X op E) : (X op F) |
1372 | Cond = D; |
1373 | True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q); |
1374 | False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q); |
1375 | if (Value *NewSel = foldAddNegate(E, F, LHS)) |
1376 | return NewSel; |
1377 | } |
1378 | |
1379 | if (!True || !False) |
1380 | return nullptr; |
1381 | |
1382 | Value *SI = Builder.CreateSelect(C: Cond, True, False); |
1383 | SI->takeName(V: &I); |
1384 | return SI; |
1385 | } |
1386 | |
1387 | /// Freely adapt every user of V as-if V was changed to !V. |
1388 | /// WARNING: only if canFreelyInvertAllUsersOf() said this can be done. |
1389 | void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) { |
1390 | assert(!isa<Constant>(I) && "Shouldn't invert users of constant" ); |
1391 | for (User *U : make_early_inc_range(Range: I->users())) { |
1392 | if (U == IgnoredUser) |
1393 | continue; // Don't consider this user. |
1394 | switch (cast<Instruction>(Val: U)->getOpcode()) { |
1395 | case Instruction::Select: { |
1396 | auto *SI = cast<SelectInst>(Val: U); |
1397 | SI->swapValues(); |
1398 | SI->swapProfMetadata(); |
1399 | break; |
1400 | } |
1401 | case Instruction::Br: { |
1402 | BranchInst *BI = cast<BranchInst>(Val: U); |
1403 | BI->swapSuccessors(); // swaps prof metadata too |
1404 | if (BPI) |
1405 | BPI->swapSuccEdgesProbabilities(Src: BI->getParent()); |
1406 | break; |
1407 | } |
1408 | case Instruction::Xor: |
1409 | replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I); |
1410 | // Add to worklist for DCE. |
1411 | addToWorklist(I: cast<Instruction>(Val: U)); |
1412 | break; |
1413 | default: |
1414 | llvm_unreachable("Got unexpected user - out of sync with " |
1415 | "canFreelyInvertAllUsersOf() ?" ); |
1416 | } |
1417 | } |
1418 | |
1419 | // Update pre-existing debug value uses. |
1420 | SmallVector<DbgValueInst *, 4> DbgValues; |
1421 | SmallVector<DbgVariableRecord *, 4> DbgVariableRecords; |
1422 | llvm::findDbgValues(DbgValues, V: I, DbgVariableRecords: &DbgVariableRecords); |
1423 | |
1424 | auto InvertDbgValueUse = [&](auto *DbgVal) { |
1425 | SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not}; |
1426 | for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps(); |
1427 | Idx != End; ++Idx) |
1428 | if (DbgVal->getVariableLocationOp(Idx) == I) |
1429 | DbgVal->setExpression( |
1430 | DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx)); |
1431 | }; |
1432 | |
1433 | for (DbgValueInst *DVI : DbgValues) |
1434 | InvertDbgValueUse(DVI); |
1435 | |
1436 | for (DbgVariableRecord *DVR : DbgVariableRecords) |
1437 | InvertDbgValueUse(DVR); |
1438 | } |
1439 | |
1440 | /// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a |
1441 | /// constant zero (which is the 'negate' form). |
1442 | Value *InstCombinerImpl::dyn_castNegVal(Value *V) const { |
1443 | Value *NegV; |
1444 | if (match(V, P: m_Neg(V: m_Value(V&: NegV)))) |
1445 | return NegV; |
1446 | |
1447 | // Constants can be considered to be negated values if they can be folded. |
1448 | if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V)) |
1449 | return ConstantExpr::getNeg(C); |
1450 | |
1451 | if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V)) |
1452 | if (C->getType()->getElementType()->isIntegerTy()) |
1453 | return ConstantExpr::getNeg(C); |
1454 | |
1455 | if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) { |
1456 | for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { |
1457 | Constant *Elt = CV->getAggregateElement(Elt: i); |
1458 | if (!Elt) |
1459 | return nullptr; |
1460 | |
1461 | if (isa<UndefValue>(Val: Elt)) |
1462 | continue; |
1463 | |
1464 | if (!isa<ConstantInt>(Val: Elt)) |
1465 | return nullptr; |
1466 | } |
1467 | return ConstantExpr::getNeg(C: CV); |
1468 | } |
1469 | |
1470 | // Negate integer vector splats. |
1471 | if (auto *CV = dyn_cast<Constant>(Val: V)) |
1472 | if (CV->getType()->isVectorTy() && |
1473 | CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue()) |
1474 | return ConstantExpr::getNeg(C: CV); |
1475 | |
1476 | return nullptr; |
1477 | } |
1478 | |
1479 | // Try to fold: |
1480 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
1481 | // -> ({s|u}itofp (int_binop x, y)) |
1482 | // 2) (fp_binop ({s|u}itofp x), FpC) |
1483 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
1484 | // |
1485 | // Assuming the sign of the cast for x/y is `OpsFromSigned`. |
1486 | Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( |
1487 | BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps, |
1488 | Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) { |
1489 | |
1490 | Type *FPTy = BO.getType(); |
1491 | Type *IntTy = IntOps[0]->getType(); |
1492 | |
1493 | unsigned IntSz = IntTy->getScalarSizeInBits(); |
1494 | // This is the maximum number of inuse bits by the integer where the int -> fp |
1495 | // casts are exact. |
1496 | unsigned MaxRepresentableBits = |
1497 | APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics()); |
1498 | |
1499 | // Preserve known number of leading bits. This can allow us to trivial nsw/nuw |
1500 | // checks later on. |
1501 | unsigned NumUsedLeadingBits[2] = {IntSz, IntSz}; |
1502 | |
1503 | // NB: This only comes up if OpsFromSigned is true, so there is no need to |
1504 | // cache if between calls to `foldFBinOpOfIntCastsFromSign`. |
1505 | auto IsNonZero = [&](unsigned OpNo) -> bool { |
1506 | if (OpsKnown[OpNo].hasKnownBits() && |
1507 | OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero()) |
1508 | return true; |
1509 | return isKnownNonZero(V: IntOps[OpNo], Q: SQ); |
1510 | }; |
1511 | |
1512 | auto IsNonNeg = [&](unsigned OpNo) -> bool { |
1513 | // NB: This matches the impl in ValueTracking, we just try to use cached |
1514 | // knownbits here. If we ever start supporting WithCache for |
1515 | // `isKnownNonNegative`, change this to an explicit call. |
1516 | return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative(); |
1517 | }; |
1518 | |
1519 | // Check if we know for certain that ({s|u}itofp op) is exact. |
1520 | auto IsValidPromotion = [&](unsigned OpNo) -> bool { |
1521 | // Can we treat this operand as the desired sign? |
1522 | if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) && |
1523 | !IsNonNeg(OpNo)) |
1524 | return false; |
1525 | |
1526 | // If fp precision >= bitwidth(op) then its exact. |
1527 | // NB: This is slightly conservative for `sitofp`. For signed conversion, we |
1528 | // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be |
1529 | // handled specially. We can't, however, increase the bound arbitrarily for |
1530 | // `sitofp` as for larger sizes, it won't sign extend. |
1531 | if (MaxRepresentableBits < IntSz) { |
1532 | // Otherwise if its signed cast check that fp precisions >= bitwidth(op) - |
1533 | // numSignBits(op). |
1534 | // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change |
1535 | // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`. |
1536 | if (OpsFromSigned) |
1537 | NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]); |
1538 | // Finally for unsigned check that fp precision >= bitwidth(op) - |
1539 | // numLeadingZeros(op). |
1540 | else { |
1541 | NumUsedLeadingBits[OpNo] = |
1542 | IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros(); |
1543 | } |
1544 | } |
1545 | // NB: We could also check if op is known to be a power of 2 or zero (which |
1546 | // will always be representable). Its unlikely, however, that is we are |
1547 | // unable to bound op in any way we will be able to pass the overflow checks |
1548 | // later on. |
1549 | |
1550 | if (MaxRepresentableBits < NumUsedLeadingBits[OpNo]) |
1551 | return false; |
1552 | // Signed + Mul also requires that op is non-zero to avoid -0 cases. |
1553 | return !OpsFromSigned || BO.getOpcode() != Instruction::FMul || |
1554 | IsNonZero(OpNo); |
1555 | }; |
1556 | |
1557 | // If we have a constant rhs, see if we can losslessly convert it to an int. |
1558 | if (Op1FpC != nullptr) { |
1559 | // Signed + Mul req non-zero |
1560 | if (OpsFromSigned && BO.getOpcode() == Instruction::FMul && |
1561 | !match(V: Op1FpC, P: m_NonZeroFP())) |
1562 | return nullptr; |
1563 | |
1564 | Constant *Op1IntC = ConstantFoldCastOperand( |
1565 | Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC, |
1566 | DestTy: IntTy, DL); |
1567 | if (Op1IntC == nullptr) |
1568 | return nullptr; |
1569 | if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP |
1570 | : Instruction::UIToFP, |
1571 | C: Op1IntC, DestTy: FPTy, DL) != Op1FpC) |
1572 | return nullptr; |
1573 | |
1574 | // First try to keep sign of cast the same. |
1575 | IntOps[1] = Op1IntC; |
1576 | } |
1577 | |
1578 | // Ensure lhs/rhs integer types match. |
1579 | if (IntTy != IntOps[1]->getType()) |
1580 | return nullptr; |
1581 | |
1582 | if (Op1FpC == nullptr) { |
1583 | if (!IsValidPromotion(1)) |
1584 | return nullptr; |
1585 | } |
1586 | if (!IsValidPromotion(0)) |
1587 | return nullptr; |
1588 | |
1589 | // Final we check if the integer version of the binop will not overflow. |
1590 | BinaryOperator::BinaryOps IntOpc; |
1591 | // Because of the precision check, we can often rule out overflows. |
1592 | bool NeedsOverflowCheck = true; |
1593 | // Try to conservatively rule out overflow based on the already done precision |
1594 | // checks. |
1595 | unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1; |
1596 | unsigned OverflowMaxCurBits = |
1597 | std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]); |
1598 | bool OutputSigned = OpsFromSigned; |
1599 | switch (BO.getOpcode()) { |
1600 | case Instruction::FAdd: |
1601 | IntOpc = Instruction::Add; |
1602 | OverflowMaxOutputBits += OverflowMaxCurBits; |
1603 | break; |
1604 | case Instruction::FSub: |
1605 | IntOpc = Instruction::Sub; |
1606 | OverflowMaxOutputBits += OverflowMaxCurBits; |
1607 | break; |
1608 | case Instruction::FMul: |
1609 | IntOpc = Instruction::Mul; |
1610 | OverflowMaxOutputBits += OverflowMaxCurBits * 2; |
1611 | break; |
1612 | default: |
1613 | llvm_unreachable("Unsupported binop" ); |
1614 | } |
1615 | // The precision check may have already ruled out overflow. |
1616 | if (OverflowMaxOutputBits < IntSz) { |
1617 | NeedsOverflowCheck = false; |
1618 | // We can bound unsigned overflow from sub to in range signed value (this is |
1619 | // what allows us to avoid the overflow check for sub). |
1620 | if (IntOpc == Instruction::Sub) |
1621 | OutputSigned = true; |
1622 | } |
1623 | |
1624 | // Precision check did not rule out overflow, so need to check. |
1625 | // TODO: If we add support for `WithCache` in `willNotOverflow`, change |
1626 | // `IntOps[...]` arguments to `KnownOps[...]`. |
1627 | if (NeedsOverflowCheck && |
1628 | !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned)) |
1629 | return nullptr; |
1630 | |
1631 | Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]); |
1632 | if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) { |
1633 | IntBO->setHasNoSignedWrap(OutputSigned); |
1634 | IntBO->setHasNoUnsignedWrap(!OutputSigned); |
1635 | } |
1636 | if (OutputSigned) |
1637 | return new SIToFPInst(IntBinOp, FPTy); |
1638 | return new UIToFPInst(IntBinOp, FPTy); |
1639 | } |
1640 | |
1641 | // Try to fold: |
1642 | // 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y)) |
1643 | // -> ({s|u}itofp (int_binop x, y)) |
1644 | // 2) (fp_binop ({s|u}itofp x), FpC) |
1645 | // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) |
1646 | Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { |
1647 | std::array<Value *, 2> IntOps = {nullptr, nullptr}; |
1648 | Constant *Op1FpC = nullptr; |
1649 | // Check for: |
1650 | // 1) (binop ({s|u}itofp x), ({s|u}itofp y)) |
1651 | // 2) (binop ({s|u}itofp x), FpC) |
1652 | if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) && |
1653 | !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0])))) |
1654 | return nullptr; |
1655 | |
1656 | if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) && |
1657 | !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) && |
1658 | !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1])))) |
1659 | return nullptr; |
1660 | |
1661 | // Cache KnownBits a bit to potentially save some analysis. |
1662 | SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]}; |
1663 | |
1664 | // Try treating x/y as coming from both `uitofp` and `sitofp`. There are |
1665 | // different constraints depending on the sign of the cast. |
1666 | // NB: `(uitofp nneg X)` == `(sitofp nneg X)`. |
1667 | if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false, |
1668 | IntOps, Op1FpC, OpsKnown)) |
1669 | return R; |
1670 | return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps, |
1671 | Op1FpC, OpsKnown); |
1672 | } |
1673 | |
1674 | /// A binop with a constant operand and a sign-extended boolean operand may be |
1675 | /// converted into a select of constants by applying the binary operation to |
1676 | /// the constant with the two possible values of the extended boolean (0 or -1). |
1677 | Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { |
1678 | // TODO: Handle non-commutative binop (constant is operand 0). |
1679 | // TODO: Handle zext. |
1680 | // TODO: Peek through 'not' of cast. |
1681 | Value *BO0 = BO.getOperand(i_nocapture: 0); |
1682 | Value *BO1 = BO.getOperand(i_nocapture: 1); |
1683 | Value *X; |
1684 | Constant *C; |
1685 | if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) || |
1686 | !X->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
1687 | return nullptr; |
1688 | |
1689 | // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C) |
1690 | Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType()); |
1691 | Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType()); |
1692 | Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C); |
1693 | Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C); |
1694 | return SelectInst::Create(C: X, S1: TVal, S2: FVal); |
1695 | } |
1696 | |
1697 | static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, |
1698 | bool IsTrueArm) { |
1699 | SmallVector<Value *> Ops; |
1700 | for (Value *Op : I.operands()) { |
1701 | Value *V = nullptr; |
1702 | if (Op == SI) { |
1703 | V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue(); |
1704 | } else if (match(V: SI->getCondition(), |
1705 | P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ |
1706 | : ICmpInst::ICMP_NE, |
1707 | L: m_Specific(V: Op), R: m_Value(V))) && |
1708 | isGuaranteedNotToBeUndefOrPoison(V)) { |
1709 | // Pass |
1710 | } else { |
1711 | V = Op; |
1712 | } |
1713 | Ops.push_back(Elt: V); |
1714 | } |
1715 | |
1716 | return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout()); |
1717 | } |
1718 | |
1719 | static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, |
1720 | Value *NewOp, InstCombiner &IC) { |
1721 | Instruction *Clone = I.clone(); |
1722 | Clone->replaceUsesOfWith(From: SI, To: NewOp); |
1723 | Clone->dropUBImplyingAttrsAndMetadata(); |
1724 | IC.InsertNewInstBefore(New: Clone, Old: I.getIterator()); |
1725 | return Clone; |
1726 | } |
1727 | |
1728 | Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, |
1729 | bool FoldWithMultiUse) { |
1730 | // Don't modify shared select instructions unless set FoldWithMultiUse |
1731 | if (!SI->hasOneUse() && !FoldWithMultiUse) |
1732 | return nullptr; |
1733 | |
1734 | Value *TV = SI->getTrueValue(); |
1735 | Value *FV = SI->getFalseValue(); |
1736 | |
1737 | // Bool selects with constant operands can be folded to logical ops. |
1738 | if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
1739 | return nullptr; |
1740 | |
1741 | // Avoid breaking min/max reduction pattern, |
1742 | // which is necessary for vectorization later. |
1743 | if (isa<MinMaxIntrinsic>(Val: &Op)) |
1744 | for (Value *IntrinOp : Op.operands()) |
1745 | if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp)) |
1746 | for (Value *PhiOp : PN->operands()) |
1747 | if (PhiOp == &Op) |
1748 | return nullptr; |
1749 | |
1750 | // Test if a FCmpInst instruction is used exclusively by a select as |
1751 | // part of a minimum or maximum operation. If so, refrain from doing |
1752 | // any other folding. This helps out other analyses which understand |
1753 | // non-obfuscated minimum and maximum idioms. And in this case, at |
1754 | // least one of the comparison operands has at least one user besides |
1755 | // the compare (the select), which would often largely negate the |
1756 | // benefit of folding anyway. |
1757 | if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) { |
1758 | if (CI->hasOneUse()) { |
1759 | Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1); |
1760 | if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) && |
1761 | !CI->isCommutative()) |
1762 | return nullptr; |
1763 | } |
1764 | } |
1765 | |
1766 | // Make sure that one of the select arms folds successfully. |
1767 | Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true); |
1768 | Value *NewFV = |
1769 | simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false); |
1770 | if (!NewTV && !NewFV) |
1771 | return nullptr; |
1772 | |
1773 | // Create an instruction for the arm that did not fold. |
1774 | if (!NewTV) |
1775 | NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this); |
1776 | if (!NewFV) |
1777 | NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this); |
1778 | return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "" , InsertBefore: nullptr, MDFrom: SI); |
1779 | } |
1780 | |
1781 | static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN, |
1782 | Value *InValue, BasicBlock *InBB, |
1783 | const DataLayout &DL, |
1784 | const SimplifyQuery SQ) { |
1785 | // NB: It is a precondition of this transform that the operands be |
1786 | // phi translatable! |
1787 | SmallVector<Value *> Ops; |
1788 | for (Value *Op : I.operands()) { |
1789 | if (Op == PN) |
1790 | Ops.push_back(Elt: InValue); |
1791 | else |
1792 | Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB)); |
1793 | } |
1794 | |
1795 | // Don't consider the simplification successful if we get back a constant |
1796 | // expression. That's just an instruction in hiding. |
1797 | // Also reject the case where we simplify back to the phi node. We wouldn't |
1798 | // be able to remove it in that case. |
1799 | Value *NewVal = simplifyInstructionWithOperands( |
1800 | I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator())); |
1801 | if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr())) |
1802 | return NewVal; |
1803 | |
1804 | // Check if incoming PHI value can be replaced with constant |
1805 | // based on implied condition. |
1806 | BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator()); |
1807 | const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I); |
1808 | if (TerminatorBI && TerminatorBI->isConditional() && |
1809 | TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) { |
1810 | bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent(); |
1811 | std::optional<bool> ImpliedCond = isImpliedCondition( |
1812 | LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1], |
1813 | DL, LHSIsTrue); |
1814 | if (ImpliedCond) |
1815 | return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value()); |
1816 | } |
1817 | |
1818 | return nullptr; |
1819 | } |
1820 | |
1821 | Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN, |
1822 | bool AllowMultipleUses) { |
1823 | unsigned NumPHIValues = PN->getNumIncomingValues(); |
1824 | if (NumPHIValues == 0) |
1825 | return nullptr; |
1826 | |
1827 | // We normally only transform phis with a single use. However, if a PHI has |
1828 | // multiple uses and they are all the same operation, we can fold *all* of the |
1829 | // uses into the PHI. |
1830 | bool OneUse = PN->hasOneUse(); |
1831 | bool IdenticalUsers = false; |
1832 | if (!AllowMultipleUses && !OneUse) { |
1833 | // Walk the use list for the instruction, comparing them to I. |
1834 | for (User *U : PN->users()) { |
1835 | Instruction *UI = cast<Instruction>(Val: U); |
1836 | if (UI != &I && !I.isIdenticalTo(I: UI)) |
1837 | return nullptr; |
1838 | } |
1839 | // Otherwise, we can replace *all* users with the new PHI we form. |
1840 | IdenticalUsers = true; |
1841 | } |
1842 | |
1843 | // Check that all operands are phi-translatable. |
1844 | for (Value *Op : I.operands()) { |
1845 | if (Op == PN) |
1846 | continue; |
1847 | |
1848 | // Non-instructions never require phi-translation. |
1849 | auto *I = dyn_cast<Instruction>(Val: Op); |
1850 | if (!I) |
1851 | continue; |
1852 | |
1853 | // Phi-translate can handle phi nodes in the same block. |
1854 | if (isa<PHINode>(Val: I)) |
1855 | if (I->getParent() == PN->getParent()) |
1856 | continue; |
1857 | |
1858 | // Operand dominates the block, no phi-translation necessary. |
1859 | if (DT.dominates(Def: I, BB: PN->getParent())) |
1860 | continue; |
1861 | |
1862 | // Not phi-translatable, bail out. |
1863 | return nullptr; |
1864 | } |
1865 | |
1866 | // Check to see whether the instruction can be folded into each phi operand. |
1867 | // If there is one operand that does not fold, remember the BB it is in. |
1868 | SmallVector<Value *> NewPhiValues; |
1869 | SmallVector<unsigned int> OpsToMoveUseToIncomingBB; |
1870 | bool SeenNonSimplifiedInVal = false; |
1871 | for (unsigned i = 0; i != NumPHIValues; ++i) { |
1872 | Value *InVal = PN->getIncomingValue(i); |
1873 | BasicBlock *InBB = PN->getIncomingBlock(i); |
1874 | |
1875 | if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) { |
1876 | NewPhiValues.push_back(Elt: NewVal); |
1877 | continue; |
1878 | } |
1879 | |
1880 | // Handle some cases that can't be fully simplified, but where we know that |
1881 | // the two instructions will fold into one. |
1882 | auto WillFold = [&]() { |
1883 | if (!InVal->hasUseList() || !InVal->hasOneUser()) |
1884 | return false; |
1885 | |
1886 | // icmp of ucmp/scmp with constant will fold to icmp. |
1887 | const APInt *Ignored; |
1888 | if (isa<CmpIntrinsic>(Val: InVal) && |
1889 | match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored)))) |
1890 | return true; |
1891 | |
1892 | // icmp eq zext(bool), 0 will fold to !bool. |
1893 | if (isa<ZExtInst>(Val: InVal) && |
1894 | cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) && |
1895 | match(V: &I, |
1896 | P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero()))) |
1897 | return true; |
1898 | |
1899 | return false; |
1900 | }; |
1901 | |
1902 | if (WillFold()) { |
1903 | OpsToMoveUseToIncomingBB.push_back(Elt: i); |
1904 | NewPhiValues.push_back(Elt: nullptr); |
1905 | continue; |
1906 | } |
1907 | |
1908 | if (!OneUse && !IdenticalUsers) |
1909 | return nullptr; |
1910 | |
1911 | if (SeenNonSimplifiedInVal) |
1912 | return nullptr; // More than one non-simplified value. |
1913 | SeenNonSimplifiedInVal = true; |
1914 | |
1915 | // If there is exactly one non-simplified value, we can insert a copy of the |
1916 | // operation in that block. However, if this is a critical edge, we would |
1917 | // be inserting the computation on some other paths (e.g. inside a loop). |
1918 | // Only do this if the pred block is unconditionally branching into the phi |
1919 | // block. Also, make sure that the pred block is not dead code. |
1920 | BranchInst *BI = dyn_cast<BranchInst>(Val: InBB->getTerminator()); |
1921 | if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(A: InBB)) |
1922 | return nullptr; |
1923 | |
1924 | NewPhiValues.push_back(Elt: nullptr); |
1925 | OpsToMoveUseToIncomingBB.push_back(Elt: i); |
1926 | |
1927 | // If the InVal is an invoke at the end of the pred block, then we can't |
1928 | // insert a computation after it without breaking the edge. |
1929 | if (isa<InvokeInst>(Val: InVal)) |
1930 | if (cast<Instruction>(Val: InVal)->getParent() == InBB) |
1931 | return nullptr; |
1932 | |
1933 | // Do not push the operation across a loop backedge. This could result in |
1934 | // an infinite combine loop, and is generally non-profitable (especially |
1935 | // if the operation was originally outside the loop). |
1936 | if (isBackEdge(From: InBB, To: PN->getParent())) |
1937 | return nullptr; |
1938 | } |
1939 | |
1940 | // Clone the instruction that uses the phi node and move it into the incoming |
1941 | // BB because we know that the next iteration of InstCombine will simplify it. |
1942 | SmallDenseMap<BasicBlock *, Instruction *> Clones; |
1943 | for (auto OpIndex : OpsToMoveUseToIncomingBB) { |
1944 | Value *Op = PN->getIncomingValue(i: OpIndex); |
1945 | BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex); |
1946 | |
1947 | Instruction *Clone = Clones.lookup(Val: OpBB); |
1948 | if (!Clone) { |
1949 | Clone = I.clone(); |
1950 | for (Use &U : Clone->operands()) { |
1951 | if (U == PN) |
1952 | U = Op; |
1953 | else |
1954 | U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB); |
1955 | } |
1956 | Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator()); |
1957 | Clones.insert(KV: {OpBB, Clone}); |
1958 | } |
1959 | |
1960 | NewPhiValues[OpIndex] = Clone; |
1961 | } |
1962 | |
1963 | // Okay, we can do the transformation: create the new PHI node. |
1964 | PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues()); |
1965 | InsertNewInstBefore(New: NewPN, Old: PN->getIterator()); |
1966 | NewPN->takeName(V: PN); |
1967 | NewPN->setDebugLoc(PN->getDebugLoc()); |
1968 | |
1969 | for (unsigned i = 0; i != NumPHIValues; ++i) |
1970 | NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i)); |
1971 | |
1972 | if (IdenticalUsers) { |
1973 | for (User *U : make_early_inc_range(Range: PN->users())) { |
1974 | Instruction *User = cast<Instruction>(Val: U); |
1975 | if (User == &I) |
1976 | continue; |
1977 | replaceInstUsesWith(I&: *User, V: NewPN); |
1978 | eraseInstFromFunction(I&: *User); |
1979 | } |
1980 | OneUse = true; |
1981 | } |
1982 | |
1983 | if (OneUse) { |
1984 | replaceAllDbgUsesWith(From&: const_cast<PHINode &>(*PN), |
1985 | To&: const_cast<PHINode &>(*NewPN), |
1986 | DomPoint&: const_cast<PHINode &>(*PN), DT); |
1987 | } |
1988 | return replaceInstUsesWith(I, V: NewPN); |
1989 | } |
1990 | |
1991 | Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) { |
1992 | if (!BO.isAssociative()) |
1993 | return nullptr; |
1994 | |
1995 | // Find the interleaved binary ops. |
1996 | auto Opc = BO.getOpcode(); |
1997 | auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0)); |
1998 | auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1)); |
1999 | if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) || |
2000 | BO0->getOpcode() != Opc || BO1->getOpcode() != Opc || |
2001 | !BO0->isAssociative() || !BO1->isAssociative() || |
2002 | BO0->getParent() != BO1->getParent()) |
2003 | return nullptr; |
2004 | |
2005 | assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() && |
2006 | "Expected commutative instructions!" ); |
2007 | |
2008 | // Find the matching phis, forming the recurrences. |
2009 | PHINode *PN0, *PN1; |
2010 | Value *Start0, *Step0, *Start1, *Step1; |
2011 | if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() || |
2012 | !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() || |
2013 | PN0->getParent() != PN1->getParent()) |
2014 | return nullptr; |
2015 | |
2016 | assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 && |
2017 | "Expected PHIs with two incoming values!" ); |
2018 | |
2019 | // Convert the start and step values to constants. |
2020 | auto *Init0 = dyn_cast<Constant>(Val: Start0); |
2021 | auto *Init1 = dyn_cast<Constant>(Val: Start1); |
2022 | auto *C0 = dyn_cast<Constant>(Val: Step0); |
2023 | auto *C1 = dyn_cast<Constant>(Val: Step1); |
2024 | if (!Init0 || !Init1 || !C0 || !C1) |
2025 | return nullptr; |
2026 | |
2027 | // Fold the recurrence constants. |
2028 | auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1); |
2029 | auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1); |
2030 | if (!Init || !C) |
2031 | return nullptr; |
2032 | |
2033 | // Create the reduced PHI. |
2034 | auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(), |
2035 | NameStr: "reduced.phi" ); |
2036 | |
2037 | // Create the new binary op. |
2038 | auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C); |
2039 | if (Opc == Instruction::FAdd || Opc == Instruction::FMul) { |
2040 | // Intersect FMF flags for FADD and FMUL. |
2041 | FastMathFlags Intersect = BO0->getFastMathFlags() & |
2042 | BO1->getFastMathFlags() & BO.getFastMathFlags(); |
2043 | NewBO->setFastMathFlags(Intersect); |
2044 | } else { |
2045 | OverflowTracking Flags; |
2046 | Flags.AllKnownNonNegative = false; |
2047 | Flags.AllKnownNonZero = false; |
2048 | Flags.mergeFlags(I&: *BO0); |
2049 | Flags.mergeFlags(I&: *BO1); |
2050 | Flags.mergeFlags(I&: BO); |
2051 | Flags.applyFlags(I&: *NewBO); |
2052 | } |
2053 | NewBO->takeName(V: &BO); |
2054 | |
2055 | for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) { |
2056 | auto *V = PN0->getIncomingValue(i: I); |
2057 | auto *BB = PN0->getIncomingBlock(i: I); |
2058 | if (V == Init0) { |
2059 | assert(((PN1->getIncomingValue(0) == Init1 && |
2060 | PN1->getIncomingBlock(0) == BB) || |
2061 | (PN1->getIncomingValue(1) == Init1 && |
2062 | PN1->getIncomingBlock(1) == BB)) && |
2063 | "Invalid incoming block!" ); |
2064 | NewPN->addIncoming(V: Init, BB); |
2065 | } else if (V == BO0) { |
2066 | assert(((PN1->getIncomingValue(0) == BO1 && |
2067 | PN1->getIncomingBlock(0) == BB) || |
2068 | (PN1->getIncomingValue(1) == BO1 && |
2069 | PN1->getIncomingBlock(1) == BB)) && |
2070 | "Invalid incoming block!" ); |
2071 | NewPN->addIncoming(V: NewBO, BB); |
2072 | } else |
2073 | llvm_unreachable("Unexpected incoming value!" ); |
2074 | } |
2075 | |
2076 | LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0 |
2077 | << "\n with " << *PN1 << "\n " << *BO1 |
2078 | << '\n'); |
2079 | |
2080 | // Insert the new recurrence and remove the old (dead) ones. |
2081 | InsertNewInstWith(New: NewPN, Old: PN0->getIterator()); |
2082 | InsertNewInstWith(New: NewBO, Old: BO0->getIterator()); |
2083 | |
2084 | eraseInstFromFunction( |
2085 | I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType()))); |
2086 | eraseInstFromFunction( |
2087 | I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType()))); |
2088 | eraseInstFromFunction(I&: *PN0); |
2089 | eraseInstFromFunction(I&: *PN1); |
2090 | |
2091 | return replaceInstUsesWith(I&: BO, V: NewBO); |
2092 | } |
2093 | |
2094 | Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { |
2095 | // Attempt to fold binary operators whose operands are simple recurrences. |
2096 | if (auto *NewBO = foldBinopWithRecurrence(BO)) |
2097 | return NewBO; |
2098 | |
2099 | // TODO: This should be similar to the incoming values check in foldOpIntoPhi: |
2100 | // we are guarding against replicating the binop in >1 predecessor. |
2101 | // This could miss matching a phi with 2 constant incoming values. |
2102 | auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0)); |
2103 | auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1)); |
2104 | if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || |
2105 | Phi0->getNumOperands() != Phi1->getNumOperands()) |
2106 | return nullptr; |
2107 | |
2108 | // TODO: Remove the restriction for binop being in the same block as the phis. |
2109 | if (BO.getParent() != Phi0->getParent() || |
2110 | BO.getParent() != Phi1->getParent()) |
2111 | return nullptr; |
2112 | |
2113 | // Fold if there is at least one specific constant value in phi0 or phi1's |
2114 | // incoming values that comes from the same block and this specific constant |
2115 | // value can be used to do optimization for specific binary operator. |
2116 | // For example: |
2117 | // %phi0 = phi i32 [0, %bb0], [%i, %bb1] |
2118 | // %phi1 = phi i32 [%j, %bb0], [0, %bb1] |
2119 | // %add = add i32 %phi0, %phi1 |
2120 | // ==> |
2121 | // %add = phi i32 [%j, %bb0], [%i, %bb1] |
2122 | Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(), |
2123 | /*AllowRHSConstant*/ false); |
2124 | if (C) { |
2125 | SmallVector<Value *, 4> NewIncomingValues; |
2126 | auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) { |
2127 | auto &Phi0Use = std::get<0>(t&: T); |
2128 | auto &Phi1Use = std::get<1>(t&: T); |
2129 | if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use)) |
2130 | return false; |
2131 | Value *Phi0UseV = Phi0Use.get(); |
2132 | Value *Phi1UseV = Phi1Use.get(); |
2133 | if (Phi0UseV == C) |
2134 | NewIncomingValues.push_back(Elt: Phi1UseV); |
2135 | else if (Phi1UseV == C) |
2136 | NewIncomingValues.push_back(Elt: Phi0UseV); |
2137 | else |
2138 | return false; |
2139 | return true; |
2140 | }; |
2141 | |
2142 | if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()), |
2143 | P: CanFoldIncomingValuePair)) { |
2144 | PHINode *NewPhi = |
2145 | PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands()); |
2146 | assert(NewIncomingValues.size() == Phi0->getNumOperands() && |
2147 | "The number of collected incoming values should equal the number " |
2148 | "of the original PHINode operands!" ); |
2149 | for (unsigned I = 0; I < Phi0->getNumOperands(); I++) |
2150 | NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I)); |
2151 | return NewPhi; |
2152 | } |
2153 | } |
2154 | |
2155 | if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) |
2156 | return nullptr; |
2157 | |
2158 | // Match a pair of incoming constants for one of the predecessor blocks. |
2159 | BasicBlock *ConstBB, *OtherBB; |
2160 | Constant *C0, *C1; |
2161 | if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) { |
2162 | ConstBB = Phi0->getIncomingBlock(i: 0); |
2163 | OtherBB = Phi0->getIncomingBlock(i: 1); |
2164 | } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) { |
2165 | ConstBB = Phi0->getIncomingBlock(i: 1); |
2166 | OtherBB = Phi0->getIncomingBlock(i: 0); |
2167 | } else { |
2168 | return nullptr; |
2169 | } |
2170 | if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1))) |
2171 | return nullptr; |
2172 | |
2173 | // The block that we are hoisting to must reach here unconditionally. |
2174 | // Otherwise, we could be speculatively executing an expensive or |
2175 | // non-speculative op. |
2176 | auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator()); |
2177 | if (!PredBlockBranch || PredBlockBranch->isConditional() || |
2178 | !DT.isReachableFromEntry(A: OtherBB)) |
2179 | return nullptr; |
2180 | |
2181 | // TODO: This check could be tightened to only apply to binops (div/rem) that |
2182 | // are not safe to speculatively execute. But that could allow hoisting |
2183 | // potentially expensive instructions (fdiv for example). |
2184 | for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter) |
2185 | if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter)) |
2186 | return nullptr; |
2187 | |
2188 | // Fold constants for the predecessor block with constant incoming values. |
2189 | Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL); |
2190 | if (!NewC) |
2191 | return nullptr; |
2192 | |
2193 | // Make a new binop in the predecessor block with the non-constant incoming |
2194 | // values. |
2195 | Builder.SetInsertPoint(PredBlockBranch); |
2196 | Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(), |
2197 | LHS: Phi0->getIncomingValueForBlock(BB: OtherBB), |
2198 | RHS: Phi1->getIncomingValueForBlock(BB: OtherBB)); |
2199 | if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
2200 | NotFoldedNewBO->copyIRFlags(V: &BO); |
2201 | |
2202 | // Replace the binop with a phi of the new values. The old phis are dead. |
2203 | PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2); |
2204 | NewPhi->addIncoming(V: NewBO, BB: OtherBB); |
2205 | NewPhi->addIncoming(V: NewC, BB: ConstBB); |
2206 | return NewPhi; |
2207 | } |
2208 | |
2209 | Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { |
2210 | if (!isa<Constant>(Val: I.getOperand(i_nocapture: 1))) |
2211 | return nullptr; |
2212 | |
2213 | if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) { |
2214 | if (Instruction *NewSel = FoldOpIntoSelect(Op&: I, SI: Sel)) |
2215 | return NewSel; |
2216 | } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) { |
2217 | if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) |
2218 | return NewPhi; |
2219 | } |
2220 | return nullptr; |
2221 | } |
2222 | |
2223 | static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) { |
2224 | // If this GEP has only 0 indices, it is the same pointer as |
2225 | // Src. If Src is not a trivial GEP too, don't combine |
2226 | // the indices. |
2227 | if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && |
2228 | !Src.hasOneUse()) |
2229 | return false; |
2230 | return true; |
2231 | } |
2232 | |
2233 | /// Find a constant NewC that has property: |
2234 | /// shuffle(NewC, ShMask) = C |
2235 | /// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2> |
2236 | /// |
2237 | /// A 1-to-1 mapping is not required. Example: |
2238 | /// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison> |
2239 | Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C, |
2240 | VectorType *NewCTy) { |
2241 | if (isa<ScalableVectorType>(Val: NewCTy)) { |
2242 | Constant *Splat = C->getSplatValue(); |
2243 | if (!Splat) |
2244 | return nullptr; |
2245 | return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat); |
2246 | } |
2247 | |
2248 | if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() > |
2249 | cast<FixedVectorType>(Val: C->getType())->getNumElements()) |
2250 | return nullptr; |
2251 | |
2252 | unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements(); |
2253 | PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType()); |
2254 | SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar); |
2255 | unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements(); |
2256 | for (unsigned I = 0; I < NumElts; ++I) { |
2257 | Constant *CElt = C->getAggregateElement(Elt: I); |
2258 | if (ShMask[I] >= 0) { |
2259 | assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle" ); |
2260 | Constant *NewCElt = NewVecC[ShMask[I]]; |
2261 | // Bail out if: |
2262 | // 1. The constant vector contains a constant expression. |
2263 | // 2. The shuffle needs an element of the constant vector that can't |
2264 | // be mapped to a new constant vector. |
2265 | // 3. This is a widening shuffle that copies elements of V1 into the |
2266 | // extended elements (extending with poison is allowed). |
2267 | if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) || |
2268 | I >= NewCNumElts) |
2269 | return nullptr; |
2270 | NewVecC[ShMask[I]] = CElt; |
2271 | } |
2272 | } |
2273 | return ConstantVector::get(V: NewVecC); |
2274 | } |
2275 | |
2276 | Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) { |
2277 | if (!isa<VectorType>(Val: Inst.getType())) |
2278 | return nullptr; |
2279 | |
2280 | BinaryOperator::BinaryOps Opcode = Inst.getOpcode(); |
2281 | Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1); |
2282 | assert(cast<VectorType>(LHS->getType())->getElementCount() == |
2283 | cast<VectorType>(Inst.getType())->getElementCount()); |
2284 | assert(cast<VectorType>(RHS->getType())->getElementCount() == |
2285 | cast<VectorType>(Inst.getType())->getElementCount()); |
2286 | |
2287 | // If both operands of the binop are vector concatenations, then perform the |
2288 | // narrow binop on each pair of the source operands followed by concatenation |
2289 | // of the results. |
2290 | Value *L0, *L1, *R0, *R1; |
2291 | ArrayRef<int> Mask; |
2292 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) && |
2293 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) && |
2294 | LHS->hasOneUse() && RHS->hasOneUse() && |
2295 | cast<ShuffleVectorInst>(Val: LHS)->isConcat() && |
2296 | cast<ShuffleVectorInst>(Val: RHS)->isConcat()) { |
2297 | // This transform does not have the speculative execution constraint as |
2298 | // below because the shuffle is a concatenation. The new binops are |
2299 | // operating on exactly the same elements as the existing binop. |
2300 | // TODO: We could ease the mask requirement to allow different undef lanes, |
2301 | // but that requires an analysis of the binop-with-undef output value. |
2302 | Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0); |
2303 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0)) |
2304 | BO->copyIRFlags(V: &Inst); |
2305 | Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1); |
2306 | if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1)) |
2307 | BO->copyIRFlags(V: &Inst); |
2308 | return new ShuffleVectorInst(NewBO0, NewBO1, Mask); |
2309 | } |
2310 | |
2311 | auto createBinOpReverse = [&](Value *X, Value *Y) { |
2312 | Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName()); |
2313 | if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) |
2314 | BO->copyIRFlags(V: &Inst); |
2315 | Module *M = Inst.getModule(); |
2316 | Function *F = Intrinsic::getOrInsertDeclaration( |
2317 | M, id: Intrinsic::vector_reverse, Tys: V->getType()); |
2318 | return CallInst::Create(Func: F, Args: V); |
2319 | }; |
2320 | |
2321 | // NOTE: Reverse shuffles don't require the speculative execution protection |
2322 | // below because they don't affect which lanes take part in the computation. |
2323 | |
2324 | Value *V1, *V2; |
2325 | if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) { |
2326 | // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) |
2327 | if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) && |
2328 | (LHS->hasOneUse() || RHS->hasOneUse() || |
2329 | (LHS == RHS && LHS->hasNUses(N: 2)))) |
2330 | return createBinOpReverse(V1, V2); |
2331 | |
2332 | // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) |
2333 | if (LHS->hasOneUse() && isSplatValue(V: RHS)) |
2334 | return createBinOpReverse(V1, RHS); |
2335 | } |
2336 | // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) |
2337 | else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2))))) |
2338 | return createBinOpReverse(LHS, V2); |
2339 | |
2340 | auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) { |
2341 | Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName()); |
2342 | if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) |
2343 | BO->copyIRFlags(V: &Inst); |
2344 | |
2345 | ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount(); |
2346 | Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue()); |
2347 | Module *M = Inst.getModule(); |
2348 | Function *F = Intrinsic::getOrInsertDeclaration( |
2349 | M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType()); |
2350 | return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL}); |
2351 | }; |
2352 | |
2353 | Value *EVL; |
2354 | if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
2355 | Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) { |
2356 | // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2)) |
2357 | if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
2358 | Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) && |
2359 | (LHS->hasOneUse() || RHS->hasOneUse() || |
2360 | (LHS == RHS && LHS->hasNUses(N: 2)))) |
2361 | return createBinOpVPReverse(V1, V2, EVL); |
2362 | |
2363 | // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat)) |
2364 | if (LHS->hasOneUse() && isSplatValue(V: RHS)) |
2365 | return createBinOpVPReverse(V1, RHS, EVL); |
2366 | } |
2367 | // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2)) |
2368 | else if (isSplatValue(V: LHS) && |
2369 | match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>( |
2370 | Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) |
2371 | return createBinOpVPReverse(LHS, V2, EVL); |
2372 | |
2373 | // It may not be safe to reorder shuffles and things like div, urem, etc. |
2374 | // because we may trap when executing those ops on unknown vector elements. |
2375 | // See PR20059. |
2376 | if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst)) |
2377 | return nullptr; |
2378 | |
2379 | auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) { |
2380 | Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
2381 | if (auto *BO = dyn_cast<BinaryOperator>(Val: XY)) |
2382 | BO->copyIRFlags(V: &Inst); |
2383 | return new ShuffleVectorInst(XY, M); |
2384 | }; |
2385 | |
2386 | // If both arguments of the binary operation are shuffles that use the same |
2387 | // mask and shuffle within a single vector, move the shuffle after the binop. |
2388 | if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) && |
2389 | match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) && |
2390 | V1->getType() == V2->getType() && |
2391 | (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { |
2392 | // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) |
2393 | return createBinOpShuffle(V1, V2, Mask); |
2394 | } |
2395 | |
2396 | // If both arguments of a commutative binop are select-shuffles that use the |
2397 | // same mask with commuted operands, the shuffles are unnecessary. |
2398 | if (Inst.isCommutative() && |
2399 | match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) && |
2400 | match(V: RHS, |
2401 | P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) { |
2402 | auto *LShuf = cast<ShuffleVectorInst>(Val: LHS); |
2403 | auto *RShuf = cast<ShuffleVectorInst>(Val: RHS); |
2404 | // TODO: Allow shuffles that contain undefs in the mask? |
2405 | // That is legal, but it reduces undef knowledge. |
2406 | // TODO: Allow arbitrary shuffles by shuffling after binop? |
2407 | // That might be legal, but we have to deal with poison. |
2408 | if (LShuf->isSelect() && |
2409 | !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) && |
2410 | RShuf->isSelect() && |
2411 | !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) { |
2412 | // Example: |
2413 | // LHS = shuffle V1, V2, <0, 5, 6, 3> |
2414 | // RHS = shuffle V2, V1, <0, 5, 6, 3> |
2415 | // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 |
2416 | Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2); |
2417 | NewBO->copyIRFlags(V: &Inst); |
2418 | return NewBO; |
2419 | } |
2420 | } |
2421 | |
2422 | // If one argument is a shuffle within one vector and the other is a constant, |
2423 | // try moving the shuffle after the binary operation. This canonicalization |
2424 | // intends to move shuffles closer to other shuffles and binops closer to |
2425 | // other binops, so they can be folded. It may also enable demanded elements |
2426 | // transforms. |
2427 | Constant *C; |
2428 | if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), |
2429 | mask: m_Mask(Mask))), |
2430 | R: m_ImmConstant(C)))) { |
2431 | assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() && |
2432 | "Shuffle should not change scalar type" ); |
2433 | |
2434 | bool ConstOp1 = isa<Constant>(Val: RHS); |
2435 | if (Constant *NewC = |
2436 | unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) { |
2437 | // For fixed vectors, lanes of NewC not used by the shuffle will be poison |
2438 | // which will cause UB for div/rem. Mask them with a safe constant. |
2439 | if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem()) |
2440 | NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1); |
2441 | |
2442 | // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) |
2443 | // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) |
2444 | Value *NewLHS = ConstOp1 ? V1 : NewC; |
2445 | Value *NewRHS = ConstOp1 ? NewC : V1; |
2446 | return createBinOpShuffle(NewLHS, NewRHS, Mask); |
2447 | } |
2448 | } |
2449 | |
2450 | // Try to reassociate to sink a splat shuffle after a binary operation. |
2451 | if (Inst.isAssociative() && Inst.isCommutative()) { |
2452 | // Canonicalize shuffle operand as LHS. |
2453 | if (isa<ShuffleVectorInst>(Val: RHS)) |
2454 | std::swap(a&: LHS, b&: RHS); |
2455 | |
2456 | Value *X; |
2457 | ArrayRef<int> MaskC; |
2458 | int SplatIndex; |
2459 | Value *Y, *OtherOp; |
2460 | if (!match(V: LHS, |
2461 | P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) || |
2462 | !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) || |
2463 | X->getType() != Inst.getType() || |
2464 | !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp))))) |
2465 | return nullptr; |
2466 | |
2467 | // FIXME: This may not be safe if the analysis allows undef elements. By |
2468 | // moving 'Y' before the splat shuffle, we are implicitly assuming |
2469 | // that it is not undef/poison at the splat index. |
2470 | if (isSplatValue(V: OtherOp, Index: SplatIndex)) { |
2471 | std::swap(a&: Y, b&: OtherOp); |
2472 | } else if (!isSplatValue(V: Y, Index: SplatIndex)) { |
2473 | return nullptr; |
2474 | } |
2475 | |
2476 | // X and Y are splatted values, so perform the binary operation on those |
2477 | // values followed by a splat followed by the 2nd binary operation: |
2478 | // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp |
2479 | Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y); |
2480 | SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex); |
2481 | Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask); |
2482 | Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp); |
2483 | |
2484 | // Intersect FMF on both new binops. Other (poison-generating) flags are |
2485 | // dropped to be safe. |
2486 | if (isa<FPMathOperator>(Val: R)) { |
2487 | R->copyFastMathFlags(I: &Inst); |
2488 | R->andIRFlags(V: RHS); |
2489 | } |
2490 | if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO)) |
2491 | NewInstBO->copyIRFlags(V: R); |
2492 | return R; |
2493 | } |
2494 | |
2495 | return nullptr; |
2496 | } |
2497 | |
2498 | /// Try to narrow the width of a binop if at least 1 operand is an extend of |
2499 | /// of a value. This requires a potentially expensive known bits check to make |
2500 | /// sure the narrow op does not overflow. |
2501 | Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) { |
2502 | // We need at least one extended operand. |
2503 | Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1); |
2504 | |
2505 | // If this is a sub, we swap the operands since we always want an extension |
2506 | // on the RHS. The LHS can be an extension or a constant. |
2507 | if (BO.getOpcode() == Instruction::Sub) |
2508 | std::swap(a&: Op0, b&: Op1); |
2509 | |
2510 | Value *X; |
2511 | bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X))); |
2512 | if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X)))) |
2513 | return nullptr; |
2514 | |
2515 | // If both operands are the same extension from the same source type and we |
2516 | // can eliminate at least one (hasOneUse), this might work. |
2517 | CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt; |
2518 | Value *Y; |
2519 | if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() && |
2520 | cast<Operator>(Val: Op1)->getOpcode() == CastOpc && |
2521 | (Op0->hasOneUse() || Op1->hasOneUse()))) { |
2522 | // If that did not match, see if we have a suitable constant operand. |
2523 | // Truncating and extending must produce the same constant. |
2524 | Constant *WideC; |
2525 | if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC))) |
2526 | return nullptr; |
2527 | Constant *NarrowC = getLosslessTrunc(C: WideC, TruncTy: X->getType(), ExtOp: CastOpc); |
2528 | if (!NarrowC) |
2529 | return nullptr; |
2530 | Y = NarrowC; |
2531 | } |
2532 | |
2533 | // Swap back now that we found our operands. |
2534 | if (BO.getOpcode() == Instruction::Sub) |
2535 | std::swap(a&: X, b&: Y); |
2536 | |
2537 | // Both operands have narrow versions. Last step: the math must not overflow |
2538 | // in the narrow width. |
2539 | if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext)) |
2540 | return nullptr; |
2541 | |
2542 | // bo (ext X), (ext Y) --> ext (bo X, Y) |
2543 | // bo (ext X), C --> ext (bo X, C') |
2544 | Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow" ); |
2545 | if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) { |
2546 | if (IsSext) |
2547 | NewBinOp->setHasNoSignedWrap(); |
2548 | else |
2549 | NewBinOp->setHasNoUnsignedWrap(); |
2550 | } |
2551 | return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType()); |
2552 | } |
2553 | |
2554 | /// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y)) |
2555 | /// transform. |
2556 | static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1, |
2557 | GEPOperator &GEP2) { |
2558 | return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags()); |
2559 | } |
2560 | |
2561 | /// Thread a GEP operation with constant indices through the constant true/false |
2562 | /// arms of a select. |
2563 | static Instruction *foldSelectGEP(GetElementPtrInst &GEP, |
2564 | InstCombiner::BuilderTy &Builder) { |
2565 | if (!GEP.hasAllConstantIndices()) |
2566 | return nullptr; |
2567 | |
2568 | Instruction *Sel; |
2569 | Value *Cond; |
2570 | Constant *TrueC, *FalseC; |
2571 | if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) || |
2572 | !match(V: Sel, |
2573 | P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC)))) |
2574 | return nullptr; |
2575 | |
2576 | // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC' |
2577 | // Propagate 'inbounds' and metadata from existing instructions. |
2578 | // Note: using IRBuilder to create the constants for efficiency. |
2579 | SmallVector<Value *, 4> IndexC(GEP.indices()); |
2580 | GEPNoWrapFlags NW = GEP.getNoWrapFlags(); |
2581 | Type *Ty = GEP.getSourceElementType(); |
2582 | Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "" , NW); |
2583 | Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "" , NW); |
2584 | return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "" , InsertBefore: nullptr, MDFrom: Sel); |
2585 | } |
2586 | |
2587 | // Canonicalization: |
2588 | // gep T, (gep i8, base, C1), (Index + C2) into |
2589 | // gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index |
2590 | static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, |
2591 | GEPOperator *Src, |
2592 | InstCombinerImpl &IC) { |
2593 | if (GEP.getNumIndices() != 1) |
2594 | return nullptr; |
2595 | auto &DL = IC.getDataLayout(); |
2596 | Value *Base; |
2597 | const APInt *C1; |
2598 | if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1)))) |
2599 | return nullptr; |
2600 | Value *VarIndex; |
2601 | const APInt *C2; |
2602 | Type *PtrTy = Src->getType()->getScalarType(); |
2603 | unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy); |
2604 | if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2)))) |
2605 | return nullptr; |
2606 | if (C1->getBitWidth() != IndexSizeInBits || |
2607 | C2->getBitWidth() != IndexSizeInBits) |
2608 | return nullptr; |
2609 | Type *BaseType = GEP.getSourceElementType(); |
2610 | if (isa<ScalableVectorType>(Val: BaseType)) |
2611 | return nullptr; |
2612 | APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType)); |
2613 | APInt NewOffset = TypeSize * *C2 + *C1; |
2614 | if (NewOffset.isZero() || |
2615 | (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) { |
2616 | Value *GEPConst = |
2617 | IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset)); |
2618 | return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex); |
2619 | } |
2620 | |
2621 | return nullptr; |
2622 | } |
2623 | |
2624 | Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, |
2625 | GEPOperator *Src) { |
2626 | // Combine Indices - If the source pointer to this getelementptr instruction |
2627 | // is a getelementptr instruction with matching element type, combine the |
2628 | // indices of the two getelementptr instructions into a single instruction. |
2629 | if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src)) |
2630 | return nullptr; |
2631 | |
2632 | if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this)) |
2633 | return I; |
2634 | |
2635 | // For constant GEPs, use a more general offset-based folding approach. |
2636 | Type *PtrTy = Src->getType()->getScalarType(); |
2637 | if (GEP.hasAllConstantIndices() && |
2638 | (Src->hasOneUse() || Src->hasAllConstantIndices())) { |
2639 | // Split Src into a variable part and a constant suffix. |
2640 | gep_type_iterator GTI = gep_type_begin(GEP: *Src); |
2641 | Type *BaseType = GTI.getIndexedType(); |
2642 | bool IsFirstType = true; |
2643 | unsigned NumVarIndices = 0; |
2644 | for (auto Pair : enumerate(First: Src->indices())) { |
2645 | if (!isa<ConstantInt>(Val: Pair.value())) { |
2646 | BaseType = GTI.getIndexedType(); |
2647 | IsFirstType = false; |
2648 | NumVarIndices = Pair.index() + 1; |
2649 | } |
2650 | ++GTI; |
2651 | } |
2652 | |
2653 | // Determine the offset for the constant suffix of Src. |
2654 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: PtrTy), 0); |
2655 | if (NumVarIndices != Src->getNumIndices()) { |
2656 | // FIXME: getIndexedOffsetInType() does not handled scalable vectors. |
2657 | if (BaseType->isScalableTy()) |
2658 | return nullptr; |
2659 | |
2660 | SmallVector<Value *> ConstantIndices; |
2661 | if (!IsFirstType) |
2662 | ConstantIndices.push_back( |
2663 | Elt: Constant::getNullValue(Ty: Type::getInt32Ty(C&: GEP.getContext()))); |
2664 | append_range(C&: ConstantIndices, R: drop_begin(RangeOrContainer: Src->indices(), N: NumVarIndices)); |
2665 | Offset += DL.getIndexedOffsetInType(ElemTy: BaseType, Indices: ConstantIndices); |
2666 | } |
2667 | |
2668 | // Add the offset for GEP (which is fully constant). |
2669 | if (!GEP.accumulateConstantOffset(DL, Offset)) |
2670 | return nullptr; |
2671 | |
2672 | // Convert the total offset back into indices. |
2673 | SmallVector<APInt> ConstIndices = |
2674 | DL.getGEPIndicesForOffset(ElemTy&: BaseType, Offset); |
2675 | if (!Offset.isZero() || (!IsFirstType && !ConstIndices[0].isZero())) |
2676 | return nullptr; |
2677 | |
2678 | GEPNoWrapFlags NW = getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)); |
2679 | SmallVector<Value *> Indices( |
2680 | drop_end(RangeOrContainer: Src->indices(), N: Src->getNumIndices() - NumVarIndices)); |
2681 | for (const APInt &Idx : drop_begin(RangeOrContainer&: ConstIndices, N: !IsFirstType)) { |
2682 | Indices.push_back(Elt: ConstantInt::get(Context&: GEP.getContext(), V: Idx)); |
2683 | // Even if the total offset is inbounds, we may end up representing it |
2684 | // by first performing a larger negative offset, and then a smaller |
2685 | // positive one. The large negative offset might go out of bounds. Only |
2686 | // preserve inbounds if all signs are the same. |
2687 | if (Idx.isNonNegative() != ConstIndices[0].isNonNegative()) |
2688 | NW = NW.withoutNoUnsignedSignedWrap(); |
2689 | if (!Idx.isNonNegative()) |
2690 | NW = NW.withoutNoUnsignedWrap(); |
2691 | } |
2692 | |
2693 | return replaceInstUsesWith( |
2694 | I&: GEP, V: Builder.CreateGEP(Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), |
2695 | IdxList: Indices, Name: "" , NW)); |
2696 | } |
2697 | |
2698 | if (Src->getResultElementType() != GEP.getSourceElementType()) |
2699 | return nullptr; |
2700 | |
2701 | SmallVector<Value*, 8> Indices; |
2702 | |
2703 | // Find out whether the last index in the source GEP is a sequential idx. |
2704 | bool EndsWithSequential = false; |
2705 | for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src); |
2706 | I != E; ++I) |
2707 | EndsWithSequential = I.isSequential(); |
2708 | |
2709 | // Can we combine the two pointer arithmetics offsets? |
2710 | if (EndsWithSequential) { |
2711 | // Replace: gep (gep %P, long B), long A, ... |
2712 | // With: T = long A+B; gep %P, T, ... |
2713 | Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands()-1); |
2714 | Value *GO1 = GEP.getOperand(i_nocapture: 1); |
2715 | |
2716 | // If they aren't the same type, then the input hasn't been processed |
2717 | // by the loop above yet (which canonicalizes sequential index types to |
2718 | // intptr_t). Just avoid transforming this until the input has been |
2719 | // normalized. |
2720 | if (SO1->getType() != GO1->getType()) |
2721 | return nullptr; |
2722 | |
2723 | Value *Sum = |
2724 | simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP)); |
2725 | // Only do the combine when we are sure the cost after the |
2726 | // merge is never more than that before the merge. |
2727 | if (Sum == nullptr) |
2728 | return nullptr; |
2729 | |
2730 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()-1); |
2731 | Indices.push_back(Elt: Sum); |
2732 | Indices.append(in_start: GEP.op_begin()+2, in_end: GEP.op_end()); |
2733 | } else if (isa<Constant>(Val: *GEP.idx_begin()) && |
2734 | cast<Constant>(Val&: *GEP.idx_begin())->isNullValue() && |
2735 | Src->getNumOperands() != 1) { |
2736 | // Otherwise we can do the fold if the first index of the GEP is a zero |
2737 | Indices.append(in_start: Src->op_begin()+1, in_end: Src->op_end()); |
2738 | Indices.append(in_start: GEP.idx_begin()+1, in_end: GEP.idx_end()); |
2739 | } |
2740 | |
2741 | if (!Indices.empty()) |
2742 | return replaceInstUsesWith( |
2743 | I&: GEP, V: Builder.CreateGEP( |
2744 | Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "" , |
2745 | NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP)))); |
2746 | |
2747 | return nullptr; |
2748 | } |
2749 | |
2750 | Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, |
2751 | BuilderTy *Builder, |
2752 | bool &DoesConsume, unsigned Depth) { |
2753 | static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1)); |
2754 | // ~(~(X)) -> X. |
2755 | Value *A, *B; |
2756 | if (match(V, P: m_Not(V: m_Value(V&: A)))) { |
2757 | DoesConsume = true; |
2758 | return A; |
2759 | } |
2760 | |
2761 | Constant *C; |
2762 | // Constants can be considered to be not'ed values. |
2763 | if (match(V, P: m_ImmConstant(C))) |
2764 | return ConstantExpr::getNot(C); |
2765 | |
2766 | if (Depth++ >= MaxAnalysisRecursionDepth) |
2767 | return nullptr; |
2768 | |
2769 | // The rest of the cases require that we invert all uses so don't bother |
2770 | // doing the analysis if we know we can't use the result. |
2771 | if (!WillInvertAllUses) |
2772 | return nullptr; |
2773 | |
2774 | // Compares can be inverted if all of their uses are being modified to use |
2775 | // the ~V. |
2776 | if (auto *I = dyn_cast<CmpInst>(Val: V)) { |
2777 | if (Builder != nullptr) |
2778 | return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0), |
2779 | RHS: I->getOperand(i_nocapture: 1)); |
2780 | return NonNull; |
2781 | } |
2782 | |
2783 | // If `V` is of the form `A + B` then `-1 - V` can be folded into |
2784 | // `(-1 - B) - A` if we are willing to invert all of the uses. |
2785 | if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2786 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2787 | DoesConsume, Depth)) |
2788 | return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull; |
2789 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2790 | DoesConsume, Depth)) |
2791 | return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull; |
2792 | return nullptr; |
2793 | } |
2794 | |
2795 | // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded |
2796 | // into `A ^ B` if we are willing to invert all of the uses. |
2797 | if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2798 | if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2799 | DoesConsume, Depth)) |
2800 | return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull; |
2801 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2802 | DoesConsume, Depth)) |
2803 | return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull; |
2804 | return nullptr; |
2805 | } |
2806 | |
2807 | // If `V` is of the form `B - A` then `-1 - V` can be folded into |
2808 | // `A + (-1 - B)` if we are willing to invert all of the uses. |
2809 | if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2810 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2811 | DoesConsume, Depth)) |
2812 | return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull; |
2813 | return nullptr; |
2814 | } |
2815 | |
2816 | // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded |
2817 | // into `A s>> B` if we are willing to invert all of the uses. |
2818 | if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2819 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2820 | DoesConsume, Depth)) |
2821 | return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull; |
2822 | return nullptr; |
2823 | } |
2824 | |
2825 | Value *Cond; |
2826 | // LogicOps are special in that we canonicalize them at the cost of an |
2827 | // instruction. |
2828 | bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) && |
2829 | !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V)); |
2830 | // Selects/min/max with invertible operands are freely invertible |
2831 | if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) { |
2832 | bool LocalDoesConsume = DoesConsume; |
2833 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr, |
2834 | DoesConsume&: LocalDoesConsume, Depth)) |
2835 | return nullptr; |
2836 | if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2837 | DoesConsume&: LocalDoesConsume, Depth)) { |
2838 | DoesConsume = LocalDoesConsume; |
2839 | if (Builder != nullptr) { |
2840 | Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2841 | DoesConsume, Depth); |
2842 | assert(NotB != nullptr && |
2843 | "Unable to build inverted value for known freely invertable op" ); |
2844 | if (auto *II = dyn_cast<IntrinsicInst>(Val: V)) |
2845 | return Builder->CreateBinaryIntrinsic( |
2846 | ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB); |
2847 | return Builder->CreateSelect(C: Cond, True: NotA, False: NotB); |
2848 | } |
2849 | return NonNull; |
2850 | } |
2851 | } |
2852 | |
2853 | if (PHINode *PN = dyn_cast<PHINode>(Val: V)) { |
2854 | bool LocalDoesConsume = DoesConsume; |
2855 | SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues; |
2856 | for (Use &U : PN->operands()) { |
2857 | BasicBlock *IncomingBlock = PN->getIncomingBlock(U); |
2858 | Value *NewIncomingVal = getFreelyInvertedImpl( |
2859 | V: U.get(), /*WillInvertAllUses=*/false, |
2860 | /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1); |
2861 | if (NewIncomingVal == nullptr) |
2862 | return nullptr; |
2863 | // Make sure that we can safely erase the original PHI node. |
2864 | if (NewIncomingVal == V) |
2865 | return nullptr; |
2866 | if (Builder != nullptr) |
2867 | IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock); |
2868 | } |
2869 | |
2870 | DoesConsume = LocalDoesConsume; |
2871 | if (Builder != nullptr) { |
2872 | IRBuilderBase::InsertPointGuard Guard(*Builder); |
2873 | Builder->SetInsertPoint(PN); |
2874 | PHINode *NewPN = |
2875 | Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues()); |
2876 | for (auto [Val, Pred] : IncomingValues) |
2877 | NewPN->addIncoming(V: Val, BB: Pred); |
2878 | return NewPN; |
2879 | } |
2880 | return NonNull; |
2881 | } |
2882 | |
2883 | if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) { |
2884 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2885 | DoesConsume, Depth)) |
2886 | return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull; |
2887 | return nullptr; |
2888 | } |
2889 | |
2890 | if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) { |
2891 | if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2892 | DoesConsume, Depth)) |
2893 | return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull; |
2894 | return nullptr; |
2895 | } |
2896 | |
2897 | // De Morgan's Laws: |
2898 | // (~(A | B)) -> (~A & ~B) |
2899 | // (~(A & B)) -> (~A | ~B) |
2900 | auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode, |
2901 | bool IsLogical, Value *A, |
2902 | Value *B) -> Value * { |
2903 | bool LocalDoesConsume = DoesConsume; |
2904 | if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr, |
2905 | DoesConsume&: LocalDoesConsume, Depth)) |
2906 | return nullptr; |
2907 | if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder, |
2908 | DoesConsume&: LocalDoesConsume, Depth)) { |
2909 | auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder, |
2910 | DoesConsume&: LocalDoesConsume, Depth); |
2911 | DoesConsume = LocalDoesConsume; |
2912 | if (IsLogical) |
2913 | return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull; |
2914 | return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull; |
2915 | } |
2916 | |
2917 | return nullptr; |
2918 | }; |
2919 | |
2920 | if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2921 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A, |
2922 | B); |
2923 | |
2924 | if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2925 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A, |
2926 | B); |
2927 | |
2928 | if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2929 | return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A, |
2930 | B); |
2931 | |
2932 | if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) |
2933 | return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A, |
2934 | B); |
2935 | |
2936 | return nullptr; |
2937 | } |
2938 | |
2939 | /// Return true if we should canonicalize the gep to an i8 ptradd. |
2940 | static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { |
2941 | Value *PtrOp = GEP.getOperand(i_nocapture: 0); |
2942 | Type *GEPEltType = GEP.getSourceElementType(); |
2943 | if (GEPEltType->isIntegerTy(Bitwidth: 8)) |
2944 | return false; |
2945 | |
2946 | // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale |
2947 | // intrinsic. This has better support in BasicAA. |
2948 | if (GEPEltType->isScalableTy()) |
2949 | return true; |
2950 | |
2951 | // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies |
2952 | // together. |
2953 | if (GEP.getNumIndices() == 1 && |
2954 | match(V: GEP.getOperand(i_nocapture: 1), |
2955 | P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()), |
2956 | R: m_Shl(L: m_Value(), R: m_ConstantInt()))))) |
2957 | return true; |
2958 | |
2959 | // gep (gep %p, C1), %x, C2 is expanded so the two constants can |
2960 | // possibly be merged together. |
2961 | auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp); |
2962 | return PtrOpGep && PtrOpGep->hasAllConstantIndices() && |
2963 | any_of(Range: GEP.indices(), P: [](Value *V) { |
2964 | const APInt *C; |
2965 | return match(V, P: m_APInt(Res&: C)) && !C->isZero(); |
2966 | }); |
2967 | } |
2968 | |
2969 | static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN, |
2970 | IRBuilderBase &Builder) { |
2971 | auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0)); |
2972 | if (!Op1) |
2973 | return nullptr; |
2974 | |
2975 | // Don't fold a GEP into itself through a PHI node. This can only happen |
2976 | // through the back-edge of a loop. Folding a GEP into itself means that |
2977 | // the value of the previous iteration needs to be stored in the meantime, |
2978 | // thus requiring an additional register variable to be live, but not |
2979 | // actually achieving anything (the GEP still needs to be executed once per |
2980 | // loop iteration). |
2981 | if (Op1 == &GEP) |
2982 | return nullptr; |
2983 | GEPNoWrapFlags NW = Op1->getNoWrapFlags(); |
2984 | |
2985 | int DI = -1; |
2986 | |
2987 | for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { |
2988 | auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I); |
2989 | if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() || |
2990 | Op1->getSourceElementType() != Op2->getSourceElementType()) |
2991 | return nullptr; |
2992 | |
2993 | // As for Op1 above, don't try to fold a GEP into itself. |
2994 | if (Op2 == &GEP) |
2995 | return nullptr; |
2996 | |
2997 | // Keep track of the type as we walk the GEP. |
2998 | Type *CurTy = nullptr; |
2999 | |
3000 | for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { |
3001 | if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType()) |
3002 | return nullptr; |
3003 | |
3004 | if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) { |
3005 | if (DI == -1) { |
3006 | // We have not seen any differences yet in the GEPs feeding the |
3007 | // PHI yet, so we record this one if it is allowed to be a |
3008 | // variable. |
3009 | |
3010 | // The first two arguments can vary for any GEP, the rest have to be |
3011 | // static for struct slots |
3012 | if (J > 1) { |
3013 | assert(CurTy && "No current type?" ); |
3014 | if (CurTy->isStructTy()) |
3015 | return nullptr; |
3016 | } |
3017 | |
3018 | DI = J; |
3019 | } else { |
3020 | // The GEP is different by more than one input. While this could be |
3021 | // extended to support GEPs that vary by more than one variable it |
3022 | // doesn't make sense since it greatly increases the complexity and |
3023 | // would result in an R+R+R addressing mode which no backend |
3024 | // directly supports and would need to be broken into several |
3025 | // simpler instructions anyway. |
3026 | return nullptr; |
3027 | } |
3028 | } |
3029 | |
3030 | // Sink down a layer of the type for the next iteration. |
3031 | if (J > 0) { |
3032 | if (J == 1) { |
3033 | CurTy = Op1->getSourceElementType(); |
3034 | } else { |
3035 | CurTy = |
3036 | GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J)); |
3037 | } |
3038 | } |
3039 | } |
3040 | |
3041 | NW &= Op2->getNoWrapFlags(); |
3042 | } |
3043 | |
3044 | // If not all GEPs are identical we'll have to create a new PHI node. |
3045 | // Check that the old PHI node has only one use so that it will get |
3046 | // removed. |
3047 | if (DI != -1 && !PN->hasOneUse()) |
3048 | return nullptr; |
3049 | |
3050 | auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone()); |
3051 | NewGEP->setNoWrapFlags(NW); |
3052 | |
3053 | if (DI == -1) { |
3054 | // All the GEPs feeding the PHI are identical. Clone one down into our |
3055 | // BB so that it can be merged with the current GEP. |
3056 | } else { |
3057 | // All the GEPs feeding the PHI differ at a single offset. Clone a GEP |
3058 | // into the current block so it can be merged, and create a new PHI to |
3059 | // set that index. |
3060 | PHINode *NewPN; |
3061 | { |
3062 | IRBuilderBase::InsertPointGuard Guard(Builder); |
3063 | Builder.SetInsertPoint(PN); |
3064 | NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(), |
3065 | NumReservedValues: PN->getNumOperands()); |
3066 | } |
3067 | |
3068 | for (auto &I : PN->operands()) |
3069 | NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI), |
3070 | BB: PN->getIncomingBlock(U: I)); |
3071 | |
3072 | NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN); |
3073 | } |
3074 | |
3075 | NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt()); |
3076 | return NewGEP; |
3077 | } |
3078 | |
3079 | Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { |
3080 | Value *PtrOp = GEP.getOperand(i_nocapture: 0); |
3081 | SmallVector<Value *, 8> Indices(GEP.indices()); |
3082 | Type *GEPType = GEP.getType(); |
3083 | Type *GEPEltType = GEP.getSourceElementType(); |
3084 | if (Value *V = |
3085 | simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(), |
3086 | Q: SQ.getWithInstruction(I: &GEP))) |
3087 | return replaceInstUsesWith(I&: GEP, V); |
3088 | |
3089 | // For vector geps, use the generic demanded vector support. |
3090 | // Skip if GEP return type is scalable. The number of elements is unknown at |
3091 | // compile-time. |
3092 | if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) { |
3093 | auto VWidth = GEPFVTy->getNumElements(); |
3094 | APInt PoisonElts(VWidth, 0); |
3095 | APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth)); |
3096 | if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask, |
3097 | PoisonElts)) { |
3098 | if (V != &GEP) |
3099 | return replaceInstUsesWith(I&: GEP, V); |
3100 | return &GEP; |
3101 | } |
3102 | } |
3103 | |
3104 | // Eliminate unneeded casts for indices, and replace indices which displace |
3105 | // by multiples of a zero size type with zero. |
3106 | bool MadeChange = false; |
3107 | |
3108 | // Index width may not be the same width as pointer width. |
3109 | // Data layout chooses the right type based on supported integer types. |
3110 | Type *NewScalarIndexTy = |
3111 | DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType()); |
3112 | |
3113 | gep_type_iterator GTI = gep_type_begin(GEP); |
3114 | for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; |
3115 | ++I, ++GTI) { |
3116 | // Skip indices into struct types. |
3117 | if (GTI.isStruct()) |
3118 | continue; |
3119 | |
3120 | Type *IndexTy = (*I)->getType(); |
3121 | Type *NewIndexType = |
3122 | IndexTy->isVectorTy() |
3123 | ? VectorType::get(ElementType: NewScalarIndexTy, |
3124 | EC: cast<VectorType>(Val: IndexTy)->getElementCount()) |
3125 | : NewScalarIndexTy; |
3126 | |
3127 | // If the element type has zero size then any index over it is equivalent |
3128 | // to an index of zero, so replace it with zero if it is not zero already. |
3129 | Type *EltTy = GTI.getIndexedType(); |
3130 | if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero()) |
3131 | if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) { |
3132 | *I = Constant::getNullValue(Ty: NewIndexType); |
3133 | MadeChange = true; |
3134 | } |
3135 | |
3136 | if (IndexTy != NewIndexType) { |
3137 | // If we are using a wider index than needed for this platform, shrink |
3138 | // it to what we need. If narrower, sign-extend it to what we need. |
3139 | // This explicit cast can make subsequent optimizations more obvious. |
3140 | *I = Builder.CreateIntCast(V: *I, DestTy: NewIndexType, isSigned: true); |
3141 | MadeChange = true; |
3142 | } |
3143 | } |
3144 | if (MadeChange) |
3145 | return &GEP; |
3146 | |
3147 | // Canonicalize constant GEPs to i8 type. |
3148 | if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) { |
3149 | APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0); |
3150 | if (GEP.accumulateConstantOffset(DL, Offset)) |
3151 | return replaceInstUsesWith( |
3152 | I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "" , |
3153 | NW: GEP.getNoWrapFlags())); |
3154 | } |
3155 | |
3156 | if (shouldCanonicalizeGEPToPtrAdd(GEP)) { |
3157 | Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP)); |
3158 | Value *NewGEP = |
3159 | Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "" , NW: GEP.getNoWrapFlags()); |
3160 | return replaceInstUsesWith(I&: GEP, V: NewGEP); |
3161 | } |
3162 | |
3163 | // Scalarize vector operands; prefer splat-of-gep.as canonical form. |
3164 | // Note that this looses information about undef lanes; we run it after |
3165 | // demanded bits to partially mitigate that loss. |
3166 | if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) { |
3167 | return Op->getType()->isVectorTy() && getSplatValue(V: Op); |
3168 | })) { |
3169 | SmallVector<Value *> NewOps; |
3170 | for (auto &Op : GEP.operands()) { |
3171 | if (Op->getType()->isVectorTy()) |
3172 | if (Value *Scalar = getSplatValue(V: Op)) { |
3173 | NewOps.push_back(Elt: Scalar); |
3174 | continue; |
3175 | } |
3176 | NewOps.push_back(Elt: Op); |
3177 | } |
3178 | |
3179 | Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0], |
3180 | IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(), |
3181 | NW: GEP.getNoWrapFlags()); |
3182 | if (!Res->getType()->isVectorTy()) { |
3183 | ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount(); |
3184 | Res = Builder.CreateVectorSplat(EC, V: Res); |
3185 | } |
3186 | return replaceInstUsesWith(I&: GEP, V: Res); |
3187 | } |
3188 | |
3189 | // Check to see if the inputs to the PHI node are getelementptr instructions. |
3190 | if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) { |
3191 | if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder)) |
3192 | return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp); |
3193 | } |
3194 | |
3195 | if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp)) |
3196 | if (Instruction *I = visitGEPOfGEP(GEP, Src)) |
3197 | return I; |
3198 | |
3199 | if (GEP.getNumIndices() == 1) { |
3200 | unsigned AS = GEP.getPointerAddressSpace(); |
3201 | if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() == |
3202 | DL.getIndexSizeInBits(AS)) { |
3203 | uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue(); |
3204 | |
3205 | if (TyAllocSize == 1) { |
3206 | // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y), |
3207 | // but only if the result pointer is only used as if it were an integer, |
3208 | // or both point to the same underlying object (otherwise provenance is |
3209 | // not necessarily retained). |
3210 | Value *X = GEP.getPointerOperand(); |
3211 | Value *Y; |
3212 | if (match(V: GEP.getOperand(i_nocapture: 1), |
3213 | P: m_Sub(L: m_PtrToInt(Op: m_Value(V&: Y)), R: m_PtrToInt(Op: m_Specific(V: X)))) && |
3214 | GEPType == Y->getType()) { |
3215 | bool HasSameUnderlyingObject = |
3216 | getUnderlyingObject(V: X) == getUnderlyingObject(V: Y); |
3217 | bool Changed = false; |
3218 | GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) { |
3219 | bool ShouldReplace = HasSameUnderlyingObject || |
3220 | isa<ICmpInst>(Val: U.getUser()) || |
3221 | isa<PtrToIntInst>(Val: U.getUser()); |
3222 | Changed |= ShouldReplace; |
3223 | return ShouldReplace; |
3224 | }); |
3225 | return Changed ? &GEP : nullptr; |
3226 | } |
3227 | } else if (auto *ExactIns = |
3228 | dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) { |
3229 | // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V) |
3230 | Value *V; |
3231 | if (ExactIns->isExact()) { |
3232 | if ((has_single_bit(Value: TyAllocSize) && |
3233 | match(V: GEP.getOperand(i_nocapture: 1), |
3234 | P: m_Shr(L: m_Value(V), |
3235 | R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) || |
3236 | match(V: GEP.getOperand(i_nocapture: 1), |
3237 | P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) { |
3238 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
3239 | Ptr: GEP.getPointerOperand(), IdxList: V, |
3240 | NW: GEP.getNoWrapFlags()); |
3241 | } |
3242 | } |
3243 | if (ExactIns->isExact() && ExactIns->hasOneUse()) { |
3244 | // Try to canonicalize non-i8 element type to i8 if the index is an |
3245 | // exact instruction. If the index is an exact instruction (div/shr) |
3246 | // with a constant RHS, we can fold the non-i8 element scale into the |
3247 | // div/shr (similiar to the mul case, just inverted). |
3248 | const APInt *C; |
3249 | std::optional<APInt> NewC; |
3250 | if (has_single_bit(Value: TyAllocSize) && |
3251 | match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) && |
3252 | C->uge(RHS: countr_zero(Val: TyAllocSize))) |
3253 | NewC = *C - countr_zero(Val: TyAllocSize); |
3254 | else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
3255 | APInt Quot; |
3256 | uint64_t Rem; |
3257 | APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
3258 | if (Rem == 0) |
3259 | NewC = Quot; |
3260 | } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) { |
3261 | APInt Quot; |
3262 | int64_t Rem; |
3263 | APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem); |
3264 | // For sdiv we need to make sure we arent creating INT_MIN / -1. |
3265 | if (!Quot.isAllOnes() && Rem == 0) |
3266 | NewC = Quot; |
3267 | } |
3268 | |
3269 | if (NewC.has_value()) { |
3270 | Value *NewOp = Builder.CreateBinOp( |
3271 | Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V, |
3272 | RHS: ConstantInt::get(Ty: V->getType(), V: *NewC)); |
3273 | cast<BinaryOperator>(Val: NewOp)->setIsExact(); |
3274 | return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(), |
3275 | Ptr: GEP.getPointerOperand(), IdxList: NewOp, |
3276 | NW: GEP.getNoWrapFlags()); |
3277 | } |
3278 | } |
3279 | } |
3280 | } |
3281 | } |
3282 | // We do not handle pointer-vector geps here. |
3283 | if (GEPType->isVectorTy()) |
3284 | return nullptr; |
3285 | |
3286 | if (!GEP.isInBounds()) { |
3287 | unsigned IdxWidth = |
3288 | DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace()); |
3289 | APInt BasePtrOffset(IdxWidth, 0); |
3290 | Value *UnderlyingPtrOp = |
3291 | PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset); |
3292 | bool CanBeNull, CanBeFreed; |
3293 | uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes( |
3294 | DL, CanBeNull, CanBeFreed); |
3295 | if (!CanBeNull && !CanBeFreed && DerefBytes != 0) { |
3296 | if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) && |
3297 | BasePtrOffset.isNonNegative()) { |
3298 | APInt AllocSize(IdxWidth, DerefBytes); |
3299 | if (BasePtrOffset.ule(RHS: AllocSize)) { |
3300 | return GetElementPtrInst::CreateInBounds( |
3301 | PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName()); |
3302 | } |
3303 | } |
3304 | } |
3305 | } |
3306 | |
3307 | // nusw + nneg -> nuw |
3308 | if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() && |
3309 | all_of(Range: GEP.indices(), P: [&](Value *Idx) { |
3310 | return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP)); |
3311 | })) { |
3312 | GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap()); |
3313 | return &GEP; |
3314 | } |
3315 | |
3316 | // These rewrites are trying to preserve inbounds/nuw attributes. So we want |
3317 | // to do this after having tried to derive "nuw" above. |
3318 | if (GEP.getNumIndices() == 1) { |
3319 | // Given (gep p, x+y) we want to determine the common nowrap flags for both |
3320 | // geps if transforming into (gep (gep p, x), y). |
3321 | auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) { |
3322 | // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know |
3323 | // that x + y does not have unsigned wrap. |
3324 | if (GEP.hasNoUnsignedWrap() && AddIsNUW) |
3325 | return GEP.getNoWrapFlags(); |
3326 | return GEPNoWrapFlags::none(); |
3327 | }; |
3328 | |
3329 | // Try to replace ADD + GEP with GEP + GEP. |
3330 | Value *Idx1, *Idx2; |
3331 | if (match(V: GEP.getOperand(i_nocapture: 1), |
3332 | P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) { |
3333 | // %idx = add i64 %idx1, %idx2 |
3334 | // %gep = getelementptr i32, ptr %ptr, i64 %idx |
3335 | // as: |
3336 | // %newptr = getelementptr i32, ptr %ptr, i64 %idx1 |
3337 | // %newgep = getelementptr i32, ptr %newptr, i64 %idx2 |
3338 | bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value())); |
3339 | GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW); |
3340 | auto *NewPtr = |
3341 | Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
3342 | IdxList: Idx1, Name: "" , NW: NWFlags); |
3343 | return replaceInstUsesWith(I&: GEP, |
3344 | V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), |
3345 | Ptr: NewPtr, IdxList: Idx2, Name: "" , NW: NWFlags)); |
3346 | } |
3347 | ConstantInt *C; |
3348 | if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike( |
3349 | L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) { |
3350 | // %add = add nsw i32 %idx1, idx2 |
3351 | // %sidx = sext i32 %add to i64 |
3352 | // %gep = getelementptr i32, ptr %ptr, i64 %sidx |
3353 | // as: |
3354 | // %newptr = getelementptr i32, ptr %ptr, i32 %idx1 |
3355 | // %newgep = getelementptr i32, ptr %newptr, i32 idx2 |
3356 | bool NUW = match(V: GEP.getOperand(i_nocapture: 1), |
3357 | P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value()))); |
3358 | GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW); |
3359 | auto *NewPtr = Builder.CreateGEP( |
3360 | Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(), |
3361 | IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "" , NW: NWFlags); |
3362 | return replaceInstUsesWith( |
3363 | I&: GEP, |
3364 | V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr, |
3365 | IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), |
3366 | Name: "" , NW: NWFlags)); |
3367 | } |
3368 | } |
3369 | |
3370 | if (Instruction *R = foldSelectGEP(GEP, Builder)) |
3371 | return R; |
3372 | |
3373 | return nullptr; |
3374 | } |
3375 | |
3376 | static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI, |
3377 | Instruction *AI) { |
3378 | if (isa<ConstantPointerNull>(Val: V)) |
3379 | return true; |
3380 | if (auto *LI = dyn_cast<LoadInst>(Val: V)) |
3381 | return isa<GlobalVariable>(Val: LI->getPointerOperand()); |
3382 | // Two distinct allocations will never be equal. |
3383 | return isAllocLikeFn(V, TLI: &TLI) && V != AI; |
3384 | } |
3385 | |
3386 | /// Given a call CB which uses an address UsedV, return true if we can prove the |
3387 | /// call's only possible effect is storing to V. |
3388 | static bool isRemovableWrite(CallBase &CB, Value *UsedV, |
3389 | const TargetLibraryInfo &TLI) { |
3390 | if (!CB.use_empty()) |
3391 | // TODO: add recursion if returned attribute is present |
3392 | return false; |
3393 | |
3394 | if (CB.isTerminator()) |
3395 | // TODO: remove implementation restriction |
3396 | return false; |
3397 | |
3398 | if (!CB.willReturn() || !CB.doesNotThrow()) |
3399 | return false; |
3400 | |
3401 | // If the only possible side effect of the call is writing to the alloca, |
3402 | // and the result isn't used, we can safely remove any reads implied by the |
3403 | // call including those which might read the alloca itself. |
3404 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI); |
3405 | return Dest && Dest->Ptr == UsedV; |
3406 | } |
3407 | |
3408 | static std::optional<ModRefInfo> |
3409 | isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users, |
3410 | const TargetLibraryInfo &TLI, bool KnowInit) { |
3411 | SmallVector<Instruction*, 4> Worklist; |
3412 | const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI); |
3413 | Worklist.push_back(Elt: AI); |
3414 | ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod; |
3415 | |
3416 | do { |
3417 | Instruction *PI = Worklist.pop_back_val(); |
3418 | for (User *U : PI->users()) { |
3419 | Instruction *I = cast<Instruction>(Val: U); |
3420 | switch (I->getOpcode()) { |
3421 | default: |
3422 | // Give up the moment we see something we can't handle. |
3423 | return std::nullopt; |
3424 | |
3425 | case Instruction::AddrSpaceCast: |
3426 | case Instruction::BitCast: |
3427 | case Instruction::GetElementPtr: |
3428 | Users.emplace_back(Args&: I); |
3429 | Worklist.push_back(Elt: I); |
3430 | continue; |
3431 | |
3432 | case Instruction::ICmp: { |
3433 | ICmpInst *ICI = cast<ICmpInst>(Val: I); |
3434 | // We can fold eq/ne comparisons with null to false/true, respectively. |
3435 | // We also fold comparisons in some conditions provided the alloc has |
3436 | // not escaped (see isNeverEqualToUnescapedAlloc). |
3437 | if (!ICI->isEquality()) |
3438 | return std::nullopt; |
3439 | unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0; |
3440 | if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI)) |
3441 | return std::nullopt; |
3442 | |
3443 | // Do not fold compares to aligned_alloc calls, as they may have to |
3444 | // return null in case the required alignment cannot be satisfied, |
3445 | // unless we can prove that both alignment and size are valid. |
3446 | auto AlignmentAndSizeKnownValid = [](CallBase *CB) { |
3447 | // Check if alignment and size of a call to aligned_alloc is valid, |
3448 | // that is alignment is a power-of-2 and the size is a multiple of the |
3449 | // alignment. |
3450 | const APInt *Alignment; |
3451 | const APInt *Size; |
3452 | return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) && |
3453 | match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) && |
3454 | Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero(); |
3455 | }; |
3456 | auto *CB = dyn_cast<CallBase>(Val: AI); |
3457 | LibFunc TheLibFunc; |
3458 | if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) && |
3459 | TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc && |
3460 | !AlignmentAndSizeKnownValid(CB)) |
3461 | return std::nullopt; |
3462 | Users.emplace_back(Args&: I); |
3463 | continue; |
3464 | } |
3465 | |
3466 | case Instruction::Call: |
3467 | // Ignore no-op and store intrinsics. |
3468 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
3469 | switch (II->getIntrinsicID()) { |
3470 | default: |
3471 | return std::nullopt; |
3472 | |
3473 | case Intrinsic::memmove: |
3474 | case Intrinsic::memcpy: |
3475 | case Intrinsic::memset: { |
3476 | MemIntrinsic *MI = cast<MemIntrinsic>(Val: II); |
3477 | if (MI->isVolatile()) |
3478 | return std::nullopt; |
3479 | // Note: this could also be ModRef, but we can still interpret that |
3480 | // as just Mod in that case. |
3481 | ModRefInfo NewAccess = |
3482 | MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref; |
3483 | if ((Access & ~NewAccess) != ModRefInfo::NoModRef) |
3484 | return std::nullopt; |
3485 | Access |= NewAccess; |
3486 | [[fallthrough]]; |
3487 | } |
3488 | case Intrinsic::assume: |
3489 | case Intrinsic::invariant_start: |
3490 | case Intrinsic::invariant_end: |
3491 | case Intrinsic::lifetime_start: |
3492 | case Intrinsic::lifetime_end: |
3493 | case Intrinsic::objectsize: |
3494 | Users.emplace_back(Args&: I); |
3495 | continue; |
3496 | case Intrinsic::launder_invariant_group: |
3497 | case Intrinsic::strip_invariant_group: |
3498 | Users.emplace_back(Args&: I); |
3499 | Worklist.push_back(Elt: I); |
3500 | continue; |
3501 | } |
3502 | } |
3503 | |
3504 | if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI && |
3505 | getAllocationFamily(I, TLI: &TLI) == Family) { |
3506 | Users.emplace_back(Args&: I); |
3507 | continue; |
3508 | } |
3509 | |
3510 | if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI && |
3511 | getAllocationFamily(I, TLI: &TLI) == Family) { |
3512 | Users.emplace_back(Args&: I); |
3513 | Worklist.push_back(Elt: I); |
3514 | continue; |
3515 | } |
3516 | |
3517 | if (!isRefSet(MRI: Access) && |
3518 | isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) { |
3519 | Access |= ModRefInfo::Mod; |
3520 | Users.emplace_back(Args&: I); |
3521 | continue; |
3522 | } |
3523 | |
3524 | return std::nullopt; |
3525 | |
3526 | case Instruction::Store: { |
3527 | StoreInst *SI = cast<StoreInst>(Val: I); |
3528 | if (SI->isVolatile() || SI->getPointerOperand() != PI) |
3529 | return std::nullopt; |
3530 | if (isRefSet(MRI: Access)) |
3531 | return std::nullopt; |
3532 | Access |= ModRefInfo::Mod; |
3533 | Users.emplace_back(Args&: I); |
3534 | continue; |
3535 | } |
3536 | |
3537 | case Instruction::Load: { |
3538 | LoadInst *LI = cast<LoadInst>(Val: I); |
3539 | if (LI->isVolatile() || LI->getPointerOperand() != PI) |
3540 | return std::nullopt; |
3541 | if (isModSet(MRI: Access)) |
3542 | return std::nullopt; |
3543 | Access |= ModRefInfo::Ref; |
3544 | Users.emplace_back(Args&: I); |
3545 | continue; |
3546 | } |
3547 | } |
3548 | llvm_unreachable("missing a return?" ); |
3549 | } |
3550 | } while (!Worklist.empty()); |
3551 | |
3552 | assert(Access != ModRefInfo::ModRef); |
3553 | return Access; |
3554 | } |
3555 | |
3556 | Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) { |
3557 | assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI)); |
3558 | |
3559 | // If we have a malloc call which is only used in any amount of comparisons to |
3560 | // null and free calls, delete the calls and replace the comparisons with true |
3561 | // or false as appropriate. |
3562 | |
3563 | // This is based on the principle that we can substitute our own allocation |
3564 | // function (which will never return null) rather than knowledge of the |
3565 | // specific function being called. In some sense this can change the permitted |
3566 | // outputs of a program (when we convert a malloc to an alloca, the fact that |
3567 | // the allocation is now on the stack is potentially visible, for example), |
3568 | // but we believe in a permissible manner. |
3569 | SmallVector<WeakTrackingVH, 64> Users; |
3570 | |
3571 | // If we are removing an alloca with a dbg.declare, insert dbg.value calls |
3572 | // before each store. |
3573 | SmallVector<DbgVariableIntrinsic *, 8> DVIs; |
3574 | SmallVector<DbgVariableRecord *, 8> DVRs; |
3575 | std::unique_ptr<DIBuilder> DIB; |
3576 | if (isa<AllocaInst>(Val: MI)) { |
3577 | findDbgUsers(DbgInsts&: DVIs, V: &MI, DbgVariableRecords: &DVRs); |
3578 | DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false)); |
3579 | } |
3580 | |
3581 | // Determine what getInitialValueOfAllocation would return without actually |
3582 | // allocating the result. |
3583 | bool KnowInitUndef = false; |
3584 | bool KnowInitZero = false; |
3585 | Constant *Init = |
3586 | getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext())); |
3587 | if (Init) { |
3588 | if (isa<UndefValue>(Val: Init)) |
3589 | KnowInitUndef = true; |
3590 | else if (Init->isNullValue()) |
3591 | KnowInitZero = true; |
3592 | } |
3593 | // The various sanitizers don't actually return undef memory, but rather |
3594 | // memory initialized with special forms of runtime poison |
3595 | auto &F = *MI.getFunction(); |
3596 | if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) || |
3597 | F.hasFnAttribute(Kind: Attribute::SanitizeAddress)) |
3598 | KnowInitUndef = false; |
3599 | |
3600 | auto Removable = |
3601 | isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef); |
3602 | if (Removable) { |
3603 | for (WeakTrackingVH &User : Users) { |
3604 | // Lowering all @llvm.objectsize and MTI calls first because they may use |
3605 | // a bitcast/GEP of the alloca we are removing. |
3606 | if (!User) |
3607 | continue; |
3608 | |
3609 | Instruction *I = cast<Instruction>(Val: &*User); |
3610 | |
3611 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
3612 | if (II->getIntrinsicID() == Intrinsic::objectsize) { |
3613 | SmallVector<Instruction *> InsertedInstructions; |
3614 | Value *Result = lowerObjectSizeCall( |
3615 | ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions); |
3616 | for (Instruction *Inserted : InsertedInstructions) |
3617 | Worklist.add(I: Inserted); |
3618 | replaceInstUsesWith(I&: *I, V: Result); |
3619 | eraseInstFromFunction(I&: *I); |
3620 | User = nullptr; // Skip examining in the next loop. |
3621 | continue; |
3622 | } |
3623 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) { |
3624 | if (KnowInitZero && isRefSet(MRI: *Removable)) { |
3625 | IRBuilderBase::InsertPointGuard Guard(Builder); |
3626 | Builder.SetInsertPoint(MTI); |
3627 | auto *M = Builder.CreateMemSet( |
3628 | Ptr: MTI->getRawDest(), |
3629 | Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0), |
3630 | Size: MTI->getLength(), Align: MTI->getDestAlign()); |
3631 | M->copyMetadata(SrcInst: *MTI); |
3632 | } |
3633 | } |
3634 | } |
3635 | } |
3636 | for (WeakTrackingVH &User : Users) { |
3637 | if (!User) |
3638 | continue; |
3639 | |
3640 | Instruction *I = cast<Instruction>(Val: &*User); |
3641 | |
3642 | if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) { |
3643 | replaceInstUsesWith(I&: *C, |
3644 | V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()), |
3645 | V: C->isFalseWhenEqual())); |
3646 | } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) { |
3647 | for (auto *DVI : DVIs) |
3648 | if (DVI->isAddressOfVariable()) |
3649 | ConvertDebugDeclareToDebugValue(DII: DVI, SI, Builder&: *DIB); |
3650 | for (auto *DVR : DVRs) |
3651 | if (DVR->isAddressOfVariable()) |
3652 | ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB); |
3653 | } else { |
3654 | // Casts, GEP, or anything else: we're about to delete this instruction, |
3655 | // so it can not have any valid uses. |
3656 | Constant *Replace; |
3657 | if (isa<LoadInst>(Val: I)) { |
3658 | assert(KnowInitZero || KnowInitUndef); |
3659 | Replace = KnowInitUndef ? UndefValue::get(T: I->getType()) |
3660 | : Constant::getNullValue(Ty: I->getType()); |
3661 | } else |
3662 | Replace = PoisonValue::get(T: I->getType()); |
3663 | replaceInstUsesWith(I&: *I, V: Replace); |
3664 | } |
3665 | eraseInstFromFunction(I&: *I); |
3666 | } |
3667 | |
3668 | if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) { |
3669 | // Replace invoke with a NOP intrinsic to maintain the original CFG |
3670 | Module *M = II->getModule(); |
3671 | Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing); |
3672 | auto *NewII = InvokeInst::Create( |
3673 | Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "" , InsertBefore: II->getParent()); |
3674 | NewII->setDebugLoc(II->getDebugLoc()); |
3675 | } |
3676 | |
3677 | // Remove debug intrinsics which describe the value contained within the |
3678 | // alloca. In addition to removing dbg.{declare,addr} which simply point to |
3679 | // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.: |
3680 | // |
3681 | // ``` |
3682 | // define void @foo(i32 %0) { |
3683 | // %a = alloca i32 ; Deleted. |
3684 | // store i32 %0, i32* %a |
3685 | // dbg.value(i32 %0, "arg0") ; Not deleted. |
3686 | // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted. |
3687 | // call void @trivially_inlinable_no_op(i32* %a) |
3688 | // ret void |
3689 | // } |
3690 | // ``` |
3691 | // |
3692 | // This may not be required if we stop describing the contents of allocas |
3693 | // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in |
3694 | // the LowerDbgDeclare utility. |
3695 | // |
3696 | // If there is a dead store to `%a` in @trivially_inlinable_no_op, the |
3697 | // "arg0" dbg.value may be stale after the call. However, failing to remove |
3698 | // the DW_OP_deref dbg.value causes large gaps in location coverage. |
3699 | // |
3700 | // FIXME: the Assignment Tracking project has now likely made this |
3701 | // redundant (and it's sometimes harmful). |
3702 | for (auto *DVI : DVIs) |
3703 | if (DVI->isAddressOfVariable() || DVI->getExpression()->startsWithDeref()) |
3704 | DVI->eraseFromParent(); |
3705 | for (auto *DVR : DVRs) |
3706 | if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref()) |
3707 | DVR->eraseFromParent(); |
3708 | |
3709 | return eraseInstFromFunction(I&: MI); |
3710 | } |
3711 | return nullptr; |
3712 | } |
3713 | |
3714 | /// Move the call to free before a NULL test. |
3715 | /// |
3716 | /// Check if this free is accessed after its argument has been test |
3717 | /// against NULL (property 0). |
3718 | /// If yes, it is legal to move this call in its predecessor block. |
3719 | /// |
3720 | /// The move is performed only if the block containing the call to free |
3721 | /// will be removed, i.e.: |
3722 | /// 1. it has only one predecessor P, and P has two successors |
3723 | /// 2. it contains the call, noops, and an unconditional branch |
3724 | /// 3. its successor is the same as its predecessor's successor |
3725 | /// |
3726 | /// The profitability is out-of concern here and this function should |
3727 | /// be called only if the caller knows this transformation would be |
3728 | /// profitable (e.g., for code size). |
3729 | static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI, |
3730 | const DataLayout &DL) { |
3731 | Value *Op = FI.getArgOperand(i: 0); |
3732 | BasicBlock *FreeInstrBB = FI.getParent(); |
3733 | BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); |
3734 | |
3735 | // Validate part of constraint #1: Only one predecessor |
3736 | // FIXME: We can extend the number of predecessor, but in that case, we |
3737 | // would duplicate the call to free in each predecessor and it may |
3738 | // not be profitable even for code size. |
3739 | if (!PredBB) |
3740 | return nullptr; |
3741 | |
3742 | // Validate constraint #2: Does this block contains only the call to |
3743 | // free, noops, and an unconditional branch? |
3744 | BasicBlock *SuccBB; |
3745 | Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator(); |
3746 | if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB))) |
3747 | return nullptr; |
3748 | |
3749 | // If there are only 2 instructions in the block, at this point, |
3750 | // this is the call to free and unconditional. |
3751 | // If there are more than 2 instructions, check that they are noops |
3752 | // i.e., they won't hurt the performance of the generated code. |
3753 | if (FreeInstrBB->size() != 2) { |
3754 | for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) { |
3755 | if (&Inst == &FI || &Inst == FreeInstrBBTerminator) |
3756 | continue; |
3757 | auto *Cast = dyn_cast<CastInst>(Val: &Inst); |
3758 | if (!Cast || !Cast->isNoopCast(DL)) |
3759 | return nullptr; |
3760 | } |
3761 | } |
3762 | // Validate the rest of constraint #1 by matching on the pred branch. |
3763 | Instruction *TI = PredBB->getTerminator(); |
3764 | BasicBlock *TrueBB, *FalseBB; |
3765 | CmpPredicate Pred; |
3766 | if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, |
3767 | L: m_CombineOr(L: m_Specific(V: Op), |
3768 | R: m_Specific(V: Op->stripPointerCasts())), |
3769 | R: m_Zero()), |
3770 | T&: TrueBB, F&: FalseBB))) |
3771 | return nullptr; |
3772 | if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) |
3773 | return nullptr; |
3774 | |
3775 | // Validate constraint #3: Ensure the null case just falls through. |
3776 | if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) |
3777 | return nullptr; |
3778 | assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && |
3779 | "Broken CFG: missing edge from predecessor to successor" ); |
3780 | |
3781 | // At this point, we know that everything in FreeInstrBB can be moved |
3782 | // before TI. |
3783 | for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) { |
3784 | if (&Instr == FreeInstrBBTerminator) |
3785 | break; |
3786 | Instr.moveBeforePreserving(MovePos: TI->getIterator()); |
3787 | } |
3788 | assert(FreeInstrBB->size() == 1 && |
3789 | "Only the branch instruction should remain" ); |
3790 | |
3791 | // Now that we've moved the call to free before the NULL check, we have to |
3792 | // remove any attributes on its parameter that imply it's non-null, because |
3793 | // those attributes might have only been valid because of the NULL check, and |
3794 | // we can get miscompiles if we keep them. This is conservative if non-null is |
3795 | // also implied by something other than the NULL check, but it's guaranteed to |
3796 | // be correct, and the conservativeness won't matter in practice, since the |
3797 | // attributes are irrelevant for the call to free itself and the pointer |
3798 | // shouldn't be used after the call. |
3799 | AttributeList Attrs = FI.getAttributes(); |
3800 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull); |
3801 | Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable); |
3802 | if (Dereferenceable.isValid()) { |
3803 | uint64_t Bytes = Dereferenceable.getDereferenceableBytes(); |
3804 | Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, |
3805 | Kind: Attribute::Dereferenceable); |
3806 | Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes); |
3807 | } |
3808 | FI.setAttributes(Attrs); |
3809 | |
3810 | return &FI; |
3811 | } |
3812 | |
3813 | Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) { |
3814 | // free undef -> unreachable. |
3815 | if (isa<UndefValue>(Val: Op)) { |
3816 | // Leave a marker since we can't modify the CFG here. |
3817 | CreateNonTerminatorUnreachable(InsertAt: &FI); |
3818 | return eraseInstFromFunction(I&: FI); |
3819 | } |
3820 | |
3821 | // If we have 'free null' delete the instruction. This can happen in stl code |
3822 | // when lots of inlining happens. |
3823 | if (isa<ConstantPointerNull>(Val: Op)) |
3824 | return eraseInstFromFunction(I&: FI); |
3825 | |
3826 | // If we had free(realloc(...)) with no intervening uses, then eliminate the |
3827 | // realloc() entirely. |
3828 | CallInst *CI = dyn_cast<CallInst>(Val: Op); |
3829 | if (CI && CI->hasOneUse()) |
3830 | if (Value *ReallocatedOp = getReallocatedOperand(CB: CI)) |
3831 | return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp)); |
3832 | |
3833 | // If we optimize for code size, try to move the call to free before the null |
3834 | // test so that simplify cfg can remove the empty block and dead code |
3835 | // elimination the branch. I.e., helps to turn something like: |
3836 | // if (foo) free(foo); |
3837 | // into |
3838 | // free(foo); |
3839 | // |
3840 | // Note that we can only do this for 'free' and not for any flavor of |
3841 | // 'operator delete'; there is no 'operator delete' symbol for which we are |
3842 | // permitted to invent a call, even if we're passing in a null pointer. |
3843 | if (MinimizeSize) { |
3844 | LibFunc Func; |
3845 | if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free) |
3846 | if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL)) |
3847 | return I; |
3848 | } |
3849 | |
3850 | return nullptr; |
3851 | } |
3852 | |
3853 | Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) { |
3854 | Value *RetVal = RI.getReturnValue(); |
3855 | if (!RetVal) |
3856 | return nullptr; |
3857 | |
3858 | Function *F = RI.getFunction(); |
3859 | Type *RetTy = RetVal->getType(); |
3860 | if (RetTy->isPointerTy()) { |
3861 | bool HasDereferenceable = |
3862 | F->getAttributes().getRetDereferenceableBytes() > 0; |
3863 | if (F->hasRetAttribute(Kind: Attribute::NonNull) || |
3864 | (HasDereferenceable && |
3865 | !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) { |
3866 | if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable)) |
3867 | return replaceOperand(I&: RI, OpNum: 0, V); |
3868 | } |
3869 | } |
3870 | |
3871 | if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy)) |
3872 | return nullptr; |
3873 | |
3874 | FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass(); |
3875 | if (ReturnClass == fcNone) |
3876 | return nullptr; |
3877 | |
3878 | KnownFPClass KnownClass; |
3879 | Value *Simplified = |
3880 | SimplifyDemandedUseFPClass(V: RetVal, DemandedMask: ~ReturnClass, Known&: KnownClass, CxtI: &RI); |
3881 | if (!Simplified) |
3882 | return nullptr; |
3883 | |
3884 | return ReturnInst::Create(C&: RI.getContext(), retVal: Simplified); |
3885 | } |
3886 | |
3887 | // WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()! |
3888 | bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) { |
3889 | // Try to remove the previous instruction if it must lead to unreachable. |
3890 | // This includes instructions like stores and "llvm.assume" that may not get |
3891 | // removed by simple dead code elimination. |
3892 | bool Changed = false; |
3893 | while (Instruction *Prev = I.getPrevNonDebugInstruction()) { |
3894 | // While we theoretically can erase EH, that would result in a block that |
3895 | // used to start with an EH no longer starting with EH, which is invalid. |
3896 | // To make it valid, we'd need to fixup predecessors to no longer refer to |
3897 | // this block, but that changes CFG, which is not allowed in InstCombine. |
3898 | if (Prev->isEHPad()) |
3899 | break; // Can not drop any more instructions. We're done here. |
3900 | |
3901 | if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev)) |
3902 | break; // Can not drop any more instructions. We're done here. |
3903 | // Otherwise, this instruction can be freely erased, |
3904 | // even if it is not side-effect free. |
3905 | |
3906 | // A value may still have uses before we process it here (for example, in |
3907 | // another unreachable block), so convert those to poison. |
3908 | replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType())); |
3909 | eraseInstFromFunction(I&: *Prev); |
3910 | Changed = true; |
3911 | } |
3912 | return Changed; |
3913 | } |
3914 | |
3915 | Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) { |
3916 | removeInstructionsBeforeUnreachable(I); |
3917 | return nullptr; |
3918 | } |
3919 | |
3920 | Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) { |
3921 | assert(BI.isUnconditional() && "Only for unconditional branches." ); |
3922 | |
3923 | // If this store is the second-to-last instruction in the basic block |
3924 | // (excluding debug info) and if the block ends with |
3925 | // an unconditional branch, try to move the store to the successor block. |
3926 | |
3927 | auto GetLastSinkableStore = [](BasicBlock::iterator BBI) { |
3928 | BasicBlock::iterator FirstInstr = BBI->getParent()->begin(); |
3929 | do { |
3930 | if (BBI != FirstInstr) |
3931 | --BBI; |
3932 | } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst()); |
3933 | |
3934 | return dyn_cast<StoreInst>(Val&: BBI); |
3935 | }; |
3936 | |
3937 | if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI))) |
3938 | if (mergeStoreIntoSuccessor(SI&: *SI)) |
3939 | return &BI; |
3940 | |
3941 | return nullptr; |
3942 | } |
3943 | |
3944 | void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To, |
3945 | SmallVectorImpl<BasicBlock *> &Worklist) { |
3946 | if (!DeadEdges.insert(V: {From, To}).second) |
3947 | return; |
3948 | |
3949 | // Replace phi node operands in successor with poison. |
3950 | for (PHINode &PN : To->phis()) |
3951 | for (Use &U : PN.incoming_values()) |
3952 | if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) { |
3953 | replaceUse(U, NewValue: PoisonValue::get(T: PN.getType())); |
3954 | addToWorklist(I: &PN); |
3955 | MadeIRChange = true; |
3956 | } |
3957 | |
3958 | Worklist.push_back(Elt: To); |
3959 | } |
3960 | |
3961 | // Under the assumption that I is unreachable, remove it and following |
3962 | // instructions. Changes are reported directly to MadeIRChange. |
3963 | void InstCombinerImpl::handleUnreachableFrom( |
3964 | Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) { |
3965 | BasicBlock *BB = I->getParent(); |
3966 | for (Instruction &Inst : make_early_inc_range( |
3967 | Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()), |
3968 | y: std::next(x: I->getReverseIterator())))) { |
3969 | if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) { |
3970 | replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType())); |
3971 | MadeIRChange = true; |
3972 | } |
3973 | if (Inst.isEHPad() || Inst.getType()->isTokenTy()) |
3974 | continue; |
3975 | // RemoveDIs: erase debug-info on this instruction manually. |
3976 | Inst.dropDbgRecords(); |
3977 | eraseInstFromFunction(I&: Inst); |
3978 | MadeIRChange = true; |
3979 | } |
3980 | |
3981 | SmallVector<Value *> Changed; |
3982 | if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) { |
3983 | MadeIRChange = true; |
3984 | for (Value *V : Changed) |
3985 | addToWorklist(I: cast<Instruction>(Val: V)); |
3986 | } |
3987 | |
3988 | // Handle potentially dead successors. |
3989 | for (BasicBlock *Succ : successors(BB)) |
3990 | addDeadEdge(From: BB, To: Succ, Worklist); |
3991 | } |
3992 | |
3993 | void InstCombinerImpl::handlePotentiallyDeadBlocks( |
3994 | SmallVectorImpl<BasicBlock *> &Worklist) { |
3995 | while (!Worklist.empty()) { |
3996 | BasicBlock *BB = Worklist.pop_back_val(); |
3997 | if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
3998 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
3999 | })) |
4000 | continue; |
4001 | |
4002 | handleUnreachableFrom(I: &BB->front(), Worklist); |
4003 | } |
4004 | } |
4005 | |
4006 | void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB, |
4007 | BasicBlock *LiveSucc) { |
4008 | SmallVector<BasicBlock *> Worklist; |
4009 | for (BasicBlock *Succ : successors(BB)) { |
4010 | // The live successor isn't dead. |
4011 | if (Succ == LiveSucc) |
4012 | continue; |
4013 | |
4014 | addDeadEdge(From: BB, To: Succ, Worklist); |
4015 | } |
4016 | |
4017 | handlePotentiallyDeadBlocks(Worklist); |
4018 | } |
4019 | |
4020 | Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { |
4021 | if (BI.isUnconditional()) |
4022 | return visitUnconditionalBranchInst(BI); |
4023 | |
4024 | // Change br (not X), label True, label False to: br X, label False, True |
4025 | Value *Cond = BI.getCondition(); |
4026 | Value *X; |
4027 | if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) { |
4028 | // Swap Destinations and condition... |
4029 | BI.swapSuccessors(); |
4030 | if (BPI) |
4031 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
4032 | return replaceOperand(I&: BI, OpNum: 0, V: X); |
4033 | } |
4034 | |
4035 | // Canonicalize logical-and-with-invert as logical-or-with-invert. |
4036 | // This is done by inverting the condition and swapping successors: |
4037 | // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T |
4038 | Value *Y; |
4039 | if (isa<SelectInst>(Val: Cond) && |
4040 | match(V: Cond, |
4041 | P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) { |
4042 | Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName()); |
4043 | Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y); |
4044 | BI.swapSuccessors(); |
4045 | if (BPI) |
4046 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
4047 | return replaceOperand(I&: BI, OpNum: 0, V: Or); |
4048 | } |
4049 | |
4050 | // If the condition is irrelevant, remove the use so that other |
4051 | // transforms on the condition become more effective. |
4052 | if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1)) |
4053 | return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType())); |
4054 | |
4055 | // Canonicalize, for example, fcmp_one -> fcmp_oeq. |
4056 | CmpPredicate Pred; |
4057 | if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) && |
4058 | !isCanonicalPredicate(Pred)) { |
4059 | // Swap destinations and condition. |
4060 | auto *Cmp = cast<CmpInst>(Val: Cond); |
4061 | Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred)); |
4062 | BI.swapSuccessors(); |
4063 | if (BPI) |
4064 | BPI->swapSuccEdgesProbabilities(Src: BI.getParent()); |
4065 | Worklist.push(I: Cmp); |
4066 | return &BI; |
4067 | } |
4068 | |
4069 | if (isa<UndefValue>(Val: Cond)) { |
4070 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr); |
4071 | return nullptr; |
4072 | } |
4073 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
4074 | handlePotentiallyDeadSuccessors(BB: BI.getParent(), |
4075 | LiveSucc: BI.getSuccessor(i: !CI->getZExtValue())); |
4076 | return nullptr; |
4077 | } |
4078 | |
4079 | // Replace all dominated uses of the condition with true/false |
4080 | // Ignore constant expressions to avoid iterating over uses on other |
4081 | // functions. |
4082 | if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) { |
4083 | for (auto &U : make_early_inc_range(Range: Cond->uses())) { |
4084 | BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0)); |
4085 | if (DT.dominates(BBE: Edge0, U)) { |
4086 | replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType())); |
4087 | addToWorklist(I: cast<Instruction>(Val: U.getUser())); |
4088 | continue; |
4089 | } |
4090 | BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1)); |
4091 | if (DT.dominates(BBE: Edge1, U)) { |
4092 | replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType())); |
4093 | addToWorklist(I: cast<Instruction>(Val: U.getUser())); |
4094 | } |
4095 | } |
4096 | } |
4097 | |
4098 | DC.registerBranch(BI: &BI); |
4099 | return nullptr; |
4100 | } |
4101 | |
4102 | // Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if |
4103 | // we can prove that both (switch C) and (switch X) go to the default when cond |
4104 | // is false/true. |
4105 | static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI, |
4106 | SelectInst *Select, |
4107 | bool IsTrueArm) { |
4108 | unsigned CstOpIdx = IsTrueArm ? 1 : 2; |
4109 | auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx)); |
4110 | if (!C) |
4111 | return nullptr; |
4112 | |
4113 | BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor(); |
4114 | if (CstBB != SI.getDefaultDest()) |
4115 | return nullptr; |
4116 | Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx); |
4117 | CmpPredicate Pred; |
4118 | const APInt *RHSC; |
4119 | if (!match(V: Select->getCondition(), |
4120 | P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC)))) |
4121 | return nullptr; |
4122 | if (IsTrueArm) |
4123 | Pred = ICmpInst::getInversePredicate(pred: Pred); |
4124 | |
4125 | // See whether we can replace the select with X |
4126 | ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC); |
4127 | for (auto Case : SI.cases()) |
4128 | if (!CR.contains(Val: Case.getCaseValue()->getValue())) |
4129 | return nullptr; |
4130 | |
4131 | return X; |
4132 | } |
4133 | |
4134 | Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) { |
4135 | Value *Cond = SI.getCondition(); |
4136 | Value *Op0; |
4137 | ConstantInt *AddRHS; |
4138 | if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_ConstantInt(CI&: AddRHS)))) { |
4139 | // Change 'switch (X+4) case 1:' into 'switch (X) case -3'. |
4140 | for (auto Case : SI.cases()) { |
4141 | Constant *NewCase = ConstantExpr::getSub(C1: Case.getCaseValue(), C2: AddRHS); |
4142 | assert(isa<ConstantInt>(NewCase) && |
4143 | "Result of expression should be constant" ); |
4144 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
4145 | } |
4146 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
4147 | } |
4148 | |
4149 | ConstantInt *SubLHS; |
4150 | if (match(V: Cond, P: m_Sub(L: m_ConstantInt(CI&: SubLHS), R: m_Value(V&: Op0)))) { |
4151 | // Change 'switch (1-X) case 1:' into 'switch (X) case 0'. |
4152 | for (auto Case : SI.cases()) { |
4153 | Constant *NewCase = ConstantExpr::getSub(C1: SubLHS, C2: Case.getCaseValue()); |
4154 | assert(isa<ConstantInt>(NewCase) && |
4155 | "Result of expression should be constant" ); |
4156 | Case.setValue(cast<ConstantInt>(Val: NewCase)); |
4157 | } |
4158 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
4159 | } |
4160 | |
4161 | uint64_t ShiftAmt; |
4162 | if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) && |
4163 | ShiftAmt < Op0->getType()->getScalarSizeInBits() && |
4164 | all_of(Range: SI.cases(), P: [&](const auto &Case) { |
4165 | return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt; |
4166 | })) { |
4167 | // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'. |
4168 | OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond); |
4169 | if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() || |
4170 | Shl->hasOneUse()) { |
4171 | Value *NewCond = Op0; |
4172 | if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) { |
4173 | // If the shift may wrap, we need to mask off the shifted bits. |
4174 | unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); |
4175 | NewCond = Builder.CreateAnd( |
4176 | LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt)); |
4177 | } |
4178 | for (auto Case : SI.cases()) { |
4179 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
4180 | APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt) |
4181 | : CaseVal.lshr(shiftAmt: ShiftAmt); |
4182 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase)); |
4183 | } |
4184 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
4185 | } |
4186 | } |
4187 | |
4188 | // Fold switch(zext/sext(X)) into switch(X) if possible. |
4189 | if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) { |
4190 | bool IsZExt = isa<ZExtInst>(Val: Cond); |
4191 | Type *SrcTy = Op0->getType(); |
4192 | unsigned NewWidth = SrcTy->getScalarSizeInBits(); |
4193 | |
4194 | if (all_of(Range: SI.cases(), P: [&](const auto &Case) { |
4195 | const APInt &CaseVal = Case.getCaseValue()->getValue(); |
4196 | return IsZExt ? CaseVal.isIntN(N: NewWidth) |
4197 | : CaseVal.isSignedIntN(N: NewWidth); |
4198 | })) { |
4199 | for (auto &Case : SI.cases()) { |
4200 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
4201 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
4202 | } |
4203 | return replaceOperand(I&: SI, OpNum: 0, V: Op0); |
4204 | } |
4205 | } |
4206 | |
4207 | // Fold switch(select cond, X, Y) into switch(X/Y) if possible |
4208 | if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) { |
4209 | if (Value *V = |
4210 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true)) |
4211 | return replaceOperand(I&: SI, OpNum: 0, V); |
4212 | if (Value *V = |
4213 | simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false)) |
4214 | return replaceOperand(I&: SI, OpNum: 0, V); |
4215 | } |
4216 | |
4217 | KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI); |
4218 | unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); |
4219 | unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); |
4220 | |
4221 | // Compute the number of leading bits we can ignore. |
4222 | // TODO: A better way to determine this would use ComputeNumSignBits(). |
4223 | for (const auto &C : SI.cases()) { |
4224 | LeadingKnownZeros = |
4225 | std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero()); |
4226 | LeadingKnownOnes = |
4227 | std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one()); |
4228 | } |
4229 | |
4230 | unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes); |
4231 | |
4232 | // Shrink the condition operand if the new type is smaller than the old type. |
4233 | // But do not shrink to a non-standard type, because backend can't generate |
4234 | // good code for that yet. |
4235 | // TODO: We can make it aggressive again after fixing PR39569. |
4236 | if (NewWidth > 0 && NewWidth < Known.getBitWidth() && |
4237 | shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) { |
4238 | IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth); |
4239 | Builder.SetInsertPoint(&SI); |
4240 | Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc" ); |
4241 | |
4242 | for (auto Case : SI.cases()) { |
4243 | APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth); |
4244 | Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase)); |
4245 | } |
4246 | return replaceOperand(I&: SI, OpNum: 0, V: NewCond); |
4247 | } |
4248 | |
4249 | if (isa<UndefValue>(Val: Cond)) { |
4250 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr); |
4251 | return nullptr; |
4252 | } |
4253 | if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) { |
4254 | handlePotentiallyDeadSuccessors(BB: SI.getParent(), |
4255 | LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor()); |
4256 | return nullptr; |
4257 | } |
4258 | |
4259 | return nullptr; |
4260 | } |
4261 | |
4262 | Instruction * |
4263 | InstCombinerImpl::(ExtractValueInst &EV) { |
4264 | auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand()); |
4265 | if (!WO) |
4266 | return nullptr; |
4267 | |
4268 | Intrinsic::ID OvID = WO->getIntrinsicID(); |
4269 | const APInt *C = nullptr; |
4270 | if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) { |
4271 | if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || |
4272 | OvID == Intrinsic::umul_with_overflow)) { |
4273 | // extractvalue (any_mul_with_overflow X, -1), 0 --> -X |
4274 | if (C->isAllOnes()) |
4275 | return BinaryOperator::CreateNeg(Op: WO->getLHS()); |
4276 | // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n |
4277 | if (C->isPowerOf2()) { |
4278 | return BinaryOperator::CreateShl( |
4279 | V1: WO->getLHS(), |
4280 | V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2())); |
4281 | } |
4282 | } |
4283 | } |
4284 | |
4285 | // We're extracting from an overflow intrinsic. See if we're the only user. |
4286 | // That allows us to simplify multiple result intrinsics to simpler things |
4287 | // that just get one value. |
4288 | if (!WO->hasOneUse()) |
4289 | return nullptr; |
4290 | |
4291 | // Check if we're grabbing only the result of a 'with overflow' intrinsic |
4292 | // and replace it with a traditional binary instruction. |
4293 | if (*EV.idx_begin() == 0) { |
4294 | Instruction::BinaryOps BinOp = WO->getBinaryOp(); |
4295 | Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); |
4296 | // Replace the old instruction's uses with poison. |
4297 | replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType())); |
4298 | eraseInstFromFunction(I&: *WO); |
4299 | return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS); |
4300 | } |
4301 | |
4302 | assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst" ); |
4303 | |
4304 | // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS. |
4305 | if (OvID == Intrinsic::usub_with_overflow) |
4306 | return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS()); |
4307 | |
4308 | // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but |
4309 | // +1 is not possible because we assume signed values. |
4310 | if (OvID == Intrinsic::smul_with_overflow && |
4311 | WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1)) |
4312 | return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS()); |
4313 | |
4314 | // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1 |
4315 | if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) { |
4316 | unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits(); |
4317 | // Only handle even bitwidths for performance reasons. |
4318 | if (BitWidth % 2 == 0) |
4319 | return new ICmpInst( |
4320 | ICmpInst::ICMP_UGT, WO->getLHS(), |
4321 | ConstantInt::get(Ty: WO->getLHS()->getType(), |
4322 | V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2))); |
4323 | } |
4324 | |
4325 | // If only the overflow result is used, and the right hand side is a |
4326 | // constant (or constant splat), we can remove the intrinsic by directly |
4327 | // checking for overflow. |
4328 | if (C) { |
4329 | // Compute the no-wrap range for LHS given RHS=C, then construct an |
4330 | // equivalent icmp, potentially using an offset. |
4331 | ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( |
4332 | BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind()); |
4333 | |
4334 | CmpInst::Predicate Pred; |
4335 | APInt NewRHSC, Offset; |
4336 | NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset); |
4337 | auto *OpTy = WO->getRHS()->getType(); |
4338 | auto *NewLHS = WO->getLHS(); |
4339 | if (Offset != 0) |
4340 | NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset)); |
4341 | return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS, |
4342 | ConstantInt::get(Ty: OpTy, V: NewRHSC)); |
4343 | } |
4344 | |
4345 | return nullptr; |
4346 | } |
4347 | |
4348 | static Value *(ExtractValueInst &EV, IntrinsicInst *FrexpCall, |
4349 | SelectInst *SelectInst, |
4350 | InstCombiner::BuilderTy &Builder) { |
4351 | // Helper to fold frexp of select to select of frexp. |
4352 | |
4353 | if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse()) |
4354 | return nullptr; |
4355 | Value *Cond = SelectInst->getCondition(); |
4356 | Value *TrueVal = SelectInst->getTrueValue(); |
4357 | Value *FalseVal = SelectInst->getFalseValue(); |
4358 | |
4359 | const APFloat *ConstVal = nullptr; |
4360 | Value *VarOp = nullptr; |
4361 | bool ConstIsTrue = false; |
4362 | |
4363 | if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) { |
4364 | VarOp = FalseVal; |
4365 | ConstIsTrue = true; |
4366 | } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) { |
4367 | VarOp = TrueVal; |
4368 | ConstIsTrue = false; |
4369 | } else { |
4370 | return nullptr; |
4371 | } |
4372 | |
4373 | Builder.SetInsertPoint(&EV); |
4374 | |
4375 | CallInst *NewFrexp = |
4376 | Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp" ); |
4377 | NewFrexp->copyIRFlags(V: FrexpCall); |
4378 | |
4379 | Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa" ); |
4380 | |
4381 | int Exp; |
4382 | APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven); |
4383 | |
4384 | Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa); |
4385 | |
4386 | Value *NewSel = Builder.CreateSelectFMF( |
4387 | C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV, |
4388 | False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp" ); |
4389 | return NewSel; |
4390 | } |
4391 | Instruction *InstCombinerImpl::(ExtractValueInst &EV) { |
4392 | Value *Agg = EV.getAggregateOperand(); |
4393 | |
4394 | if (!EV.hasIndices()) |
4395 | return replaceInstUsesWith(I&: EV, V: Agg); |
4396 | |
4397 | if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(), |
4398 | Q: SQ.getWithInstruction(I: &EV))) |
4399 | return replaceInstUsesWith(I&: EV, V); |
4400 | |
4401 | Value *Cond, *TrueVal, *FalseVal; |
4402 | if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select( |
4403 | C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) { |
4404 | auto *SelInst = |
4405 | cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0)); |
4406 | if (Value *Result = |
4407 | foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder)) |
4408 | return replaceInstUsesWith(I&: EV, V: Result); |
4409 | } |
4410 | if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) { |
4411 | // We're extracting from an insertvalue instruction, compare the indices |
4412 | const unsigned *exti, *exte, *insi, *inse; |
4413 | for (exti = EV.idx_begin(), insi = IV->idx_begin(), |
4414 | exte = EV.idx_end(), inse = IV->idx_end(); |
4415 | exti != exte && insi != inse; |
4416 | ++exti, ++insi) { |
4417 | if (*insi != *exti) |
4418 | // The insert and extract both reference distinctly different elements. |
4419 | // This means the extract is not influenced by the insert, and we can |
4420 | // replace the aggregate operand of the extract with the aggregate |
4421 | // operand of the insert. i.e., replace |
4422 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
4423 | // %E = extractvalue { i32, { i32 } } %I, 0 |
4424 | // with |
4425 | // %E = extractvalue { i32, { i32 } } %A, 0 |
4426 | return ExtractValueInst::Create(Agg: IV->getAggregateOperand(), |
4427 | Idxs: EV.getIndices()); |
4428 | } |
4429 | if (exti == exte && insi == inse) |
4430 | // Both iterators are at the end: Index lists are identical. Replace |
4431 | // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
4432 | // %C = extractvalue { i32, { i32 } } %B, 1, 0 |
4433 | // with "i32 42" |
4434 | return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand()); |
4435 | if (exti == exte) { |
4436 | // The extract list is a prefix of the insert list. i.e. replace |
4437 | // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0 |
4438 | // %E = extractvalue { i32, { i32 } } %I, 1 |
4439 | // with |
4440 | // %X = extractvalue { i32, { i32 } } %A, 1 |
4441 | // %E = insertvalue { i32 } %X, i32 42, 0 |
4442 | // by switching the order of the insert and extract (though the |
4443 | // insertvalue should be left in, since it may have other uses). |
4444 | Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(), |
4445 | Idxs: EV.getIndices()); |
4446 | return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(), |
4447 | Idxs: ArrayRef(insi, inse)); |
4448 | } |
4449 | if (insi == inse) |
4450 | // The insert list is a prefix of the extract list |
4451 | // We can simply remove the common indices from the extract and make it |
4452 | // operate on the inserted value instead of the insertvalue result. |
4453 | // i.e., replace |
4454 | // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1 |
4455 | // %E = extractvalue { i32, { i32 } } %I, 1, 0 |
4456 | // with |
4457 | // %E extractvalue { i32 } { i32 42 }, 0 |
4458 | return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(), |
4459 | Idxs: ArrayRef(exti, exte)); |
4460 | } |
4461 | |
4462 | if (Instruction *R = foldExtractOfOverflowIntrinsic(EV)) |
4463 | return R; |
4464 | |
4465 | if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) { |
4466 | // Bail out if the aggregate contains scalable vector type |
4467 | if (auto *STy = dyn_cast<StructType>(Val: Agg->getType()); |
4468 | STy && STy->isScalableTy()) |
4469 | return nullptr; |
4470 | |
4471 | // If the (non-volatile) load only has one use, we can rewrite this to a |
4472 | // load from a GEP. This reduces the size of the load. If a load is used |
4473 | // only by extractvalue instructions then this either must have been |
4474 | // optimized before, or it is a struct with padding, in which case we |
4475 | // don't want to do the transformation as it loses padding knowledge. |
4476 | if (L->isSimple() && L->hasOneUse()) { |
4477 | // extractvalue has integer indices, getelementptr has Value*s. Convert. |
4478 | SmallVector<Value*, 4> Indices; |
4479 | // Prefix an i32 0 since we need the first element. |
4480 | Indices.push_back(Elt: Builder.getInt32(C: 0)); |
4481 | for (unsigned Idx : EV.indices()) |
4482 | Indices.push_back(Elt: Builder.getInt32(C: Idx)); |
4483 | |
4484 | // We need to insert these at the location of the old load, not at that of |
4485 | // the extractvalue. |
4486 | Builder.SetInsertPoint(L); |
4487 | Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(), |
4488 | Ptr: L->getPointerOperand(), IdxList: Indices); |
4489 | Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP); |
4490 | // Whatever aliasing information we had for the orignal load must also |
4491 | // hold for the smaller load, so propagate the annotations. |
4492 | NL->setAAMetadata(L->getAAMetadata()); |
4493 | // Returning the load directly will cause the main loop to insert it in |
4494 | // the wrong spot, so use replaceInstUsesWith(). |
4495 | return replaceInstUsesWith(I&: EV, V: NL); |
4496 | } |
4497 | } |
4498 | |
4499 | if (auto *PN = dyn_cast<PHINode>(Val: Agg)) |
4500 | if (Instruction *Res = foldOpIntoPhi(I&: EV, PN)) |
4501 | return Res; |
4502 | |
4503 | // Canonicalize extract (select Cond, TV, FV) |
4504 | // -> select cond, (extract TV), (extract FV) |
4505 | if (auto *SI = dyn_cast<SelectInst>(Val: Agg)) |
4506 | if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true)) |
4507 | return R; |
4508 | |
4509 | // We could simplify extracts from other values. Note that nested extracts may |
4510 | // already be simplified implicitly by the above: extract (extract (insert) ) |
4511 | // will be translated into extract ( insert ( extract ) ) first and then just |
4512 | // the value inserted, if appropriate. Similarly for extracts from single-use |
4513 | // loads: extract (extract (load)) will be translated to extract (load (gep)) |
4514 | // and if again single-use then via load (gep (gep)) to load (gep). |
4515 | // However, double extracts from e.g. function arguments or return values |
4516 | // aren't handled yet. |
4517 | return nullptr; |
4518 | } |
4519 | |
4520 | /// Return 'true' if the given typeinfo will match anything. |
4521 | static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { |
4522 | switch (Personality) { |
4523 | case EHPersonality::GNU_C: |
4524 | case EHPersonality::GNU_C_SjLj: |
4525 | case EHPersonality::Rust: |
4526 | // The GCC C EH and Rust personality only exists to support cleanups, so |
4527 | // it's not clear what the semantics of catch clauses are. |
4528 | return false; |
4529 | case EHPersonality::Unknown: |
4530 | return false; |
4531 | case EHPersonality::GNU_Ada: |
4532 | // While __gnat_all_others_value will match any Ada exception, it doesn't |
4533 | // match foreign exceptions (or didn't, before gcc-4.7). |
4534 | return false; |
4535 | case EHPersonality::GNU_CXX: |
4536 | case EHPersonality::GNU_CXX_SjLj: |
4537 | case EHPersonality::GNU_ObjC: |
4538 | case EHPersonality::MSVC_X86SEH: |
4539 | case EHPersonality::MSVC_TableSEH: |
4540 | case EHPersonality::MSVC_CXX: |
4541 | case EHPersonality::CoreCLR: |
4542 | case EHPersonality::Wasm_CXX: |
4543 | case EHPersonality::XL_CXX: |
4544 | case EHPersonality::ZOS_CXX: |
4545 | return TypeInfo->isNullValue(); |
4546 | } |
4547 | llvm_unreachable("invalid enum" ); |
4548 | } |
4549 | |
4550 | static bool shorter_filter(const Value *LHS, const Value *RHS) { |
4551 | return |
4552 | cast<ArrayType>(Val: LHS->getType())->getNumElements() |
4553 | < |
4554 | cast<ArrayType>(Val: RHS->getType())->getNumElements(); |
4555 | } |
4556 | |
4557 | Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) { |
4558 | // The logic here should be correct for any real-world personality function. |
4559 | // However if that turns out not to be true, the offending logic can always |
4560 | // be conditioned on the personality function, like the catch-all logic is. |
4561 | EHPersonality Personality = |
4562 | classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn()); |
4563 | |
4564 | // Simplify the list of clauses, eg by removing repeated catch clauses |
4565 | // (these are often created by inlining). |
4566 | bool MakeNewInstruction = false; // If true, recreate using the following: |
4567 | SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction; |
4568 | bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. |
4569 | |
4570 | SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. |
4571 | for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { |
4572 | bool isLastClause = i + 1 == e; |
4573 | if (LI.isCatch(Idx: i)) { |
4574 | // A catch clause. |
4575 | Constant *CatchClause = LI.getClause(Idx: i); |
4576 | Constant *TypeInfo = CatchClause->stripPointerCasts(); |
4577 | |
4578 | // If we already saw this clause, there is no point in having a second |
4579 | // copy of it. |
4580 | if (AlreadyCaught.insert(Ptr: TypeInfo).second) { |
4581 | // This catch clause was not already seen. |
4582 | NewClauses.push_back(Elt: CatchClause); |
4583 | } else { |
4584 | // Repeated catch clause - drop the redundant copy. |
4585 | MakeNewInstruction = true; |
4586 | } |
4587 | |
4588 | // If this is a catch-all then there is no point in keeping any following |
4589 | // clauses or marking the landingpad as having a cleanup. |
4590 | if (isCatchAll(Personality, TypeInfo)) { |
4591 | if (!isLastClause) |
4592 | MakeNewInstruction = true; |
4593 | CleanupFlag = false; |
4594 | break; |
4595 | } |
4596 | } else { |
4597 | // A filter clause. If any of the filter elements were already caught |
4598 | // then they can be dropped from the filter. It is tempting to try to |
4599 | // exploit the filter further by saying that any typeinfo that does not |
4600 | // occur in the filter can't be caught later (and thus can be dropped). |
4601 | // However this would be wrong, since typeinfos can match without being |
4602 | // equal (for example if one represents a C++ class, and the other some |
4603 | // class derived from it). |
4604 | assert(LI.isFilter(i) && "Unsupported landingpad clause!" ); |
4605 | Constant *FilterClause = LI.getClause(Idx: i); |
4606 | ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType()); |
4607 | unsigned NumTypeInfos = FilterType->getNumElements(); |
4608 | |
4609 | // An empty filter catches everything, so there is no point in keeping any |
4610 | // following clauses or marking the landingpad as having a cleanup. By |
4611 | // dealing with this case here the following code is made a bit simpler. |
4612 | if (!NumTypeInfos) { |
4613 | NewClauses.push_back(Elt: FilterClause); |
4614 | if (!isLastClause) |
4615 | MakeNewInstruction = true; |
4616 | CleanupFlag = false; |
4617 | break; |
4618 | } |
4619 | |
4620 | bool MakeNewFilter = false; // If true, make a new filter. |
4621 | SmallVector<Constant *, 16> NewFilterElts; // New elements. |
4622 | if (isa<ConstantAggregateZero>(Val: FilterClause)) { |
4623 | // Not an empty filter - it contains at least one null typeinfo. |
4624 | assert(NumTypeInfos > 0 && "Should have handled empty filter already!" ); |
4625 | Constant *TypeInfo = |
4626 | Constant::getNullValue(Ty: FilterType->getElementType()); |
4627 | // If this typeinfo is a catch-all then the filter can never match. |
4628 | if (isCatchAll(Personality, TypeInfo)) { |
4629 | // Throw the filter away. |
4630 | MakeNewInstruction = true; |
4631 | continue; |
4632 | } |
4633 | |
4634 | // There is no point in having multiple copies of this typeinfo, so |
4635 | // discard all but the first copy if there is more than one. |
4636 | NewFilterElts.push_back(Elt: TypeInfo); |
4637 | if (NumTypeInfos > 1) |
4638 | MakeNewFilter = true; |
4639 | } else { |
4640 | ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause); |
4641 | SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements. |
4642 | NewFilterElts.reserve(N: NumTypeInfos); |
4643 | |
4644 | // Remove any filter elements that were already caught or that already |
4645 | // occurred in the filter. While there, see if any of the elements are |
4646 | // catch-alls. If so, the filter can be discarded. |
4647 | bool SawCatchAll = false; |
4648 | for (unsigned j = 0; j != NumTypeInfos; ++j) { |
4649 | Constant *Elt = Filter->getOperand(i_nocapture: j); |
4650 | Constant *TypeInfo = Elt->stripPointerCasts(); |
4651 | if (isCatchAll(Personality, TypeInfo)) { |
4652 | // This element is a catch-all. Bail out, noting this fact. |
4653 | SawCatchAll = true; |
4654 | break; |
4655 | } |
4656 | |
4657 | // Even if we've seen a type in a catch clause, we don't want to |
4658 | // remove it from the filter. An unexpected type handler may be |
4659 | // set up for a call site which throws an exception of the same |
4660 | // type caught. In order for the exception thrown by the unexpected |
4661 | // handler to propagate correctly, the filter must be correctly |
4662 | // described for the call site. |
4663 | // |
4664 | // Example: |
4665 | // |
4666 | // void unexpected() { throw 1;} |
4667 | // void foo() throw (int) { |
4668 | // std::set_unexpected(unexpected); |
4669 | // try { |
4670 | // throw 2.0; |
4671 | // } catch (int i) {} |
4672 | // } |
4673 | |
4674 | // There is no point in having multiple copies of the same typeinfo in |
4675 | // a filter, so only add it if we didn't already. |
4676 | if (SeenInFilter.insert(Ptr: TypeInfo).second) |
4677 | NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt)); |
4678 | } |
4679 | // A filter containing a catch-all cannot match anything by definition. |
4680 | if (SawCatchAll) { |
4681 | // Throw the filter away. |
4682 | MakeNewInstruction = true; |
4683 | continue; |
4684 | } |
4685 | |
4686 | // If we dropped something from the filter, make a new one. |
4687 | if (NewFilterElts.size() < NumTypeInfos) |
4688 | MakeNewFilter = true; |
4689 | } |
4690 | if (MakeNewFilter) { |
4691 | FilterType = ArrayType::get(ElementType: FilterType->getElementType(), |
4692 | NumElements: NewFilterElts.size()); |
4693 | FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts); |
4694 | MakeNewInstruction = true; |
4695 | } |
4696 | |
4697 | NewClauses.push_back(Elt: FilterClause); |
4698 | |
4699 | // If the new filter is empty then it will catch everything so there is |
4700 | // no point in keeping any following clauses or marking the landingpad |
4701 | // as having a cleanup. The case of the original filter being empty was |
4702 | // already handled above. |
4703 | if (MakeNewFilter && !NewFilterElts.size()) { |
4704 | assert(MakeNewInstruction && "New filter but not a new instruction!" ); |
4705 | CleanupFlag = false; |
4706 | break; |
4707 | } |
4708 | } |
4709 | } |
4710 | |
4711 | // If several filters occur in a row then reorder them so that the shortest |
4712 | // filters come first (those with the smallest number of elements). This is |
4713 | // advantageous because shorter filters are more likely to match, speeding up |
4714 | // unwinding, but mostly because it increases the effectiveness of the other |
4715 | // filter optimizations below. |
4716 | for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { |
4717 | unsigned j; |
4718 | // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. |
4719 | for (j = i; j != e; ++j) |
4720 | if (!isa<ArrayType>(Val: NewClauses[j]->getType())) |
4721 | break; |
4722 | |
4723 | // Check whether the filters are already sorted by length. We need to know |
4724 | // if sorting them is actually going to do anything so that we only make a |
4725 | // new landingpad instruction if it does. |
4726 | for (unsigned k = i; k + 1 < j; ++k) |
4727 | if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) { |
4728 | // Not sorted, so sort the filters now. Doing an unstable sort would be |
4729 | // correct too but reordering filters pointlessly might confuse users. |
4730 | std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j, |
4731 | comp: shorter_filter); |
4732 | MakeNewInstruction = true; |
4733 | break; |
4734 | } |
4735 | |
4736 | // Look for the next batch of filters. |
4737 | i = j + 1; |
4738 | } |
4739 | |
4740 | // If typeinfos matched if and only if equal, then the elements of a filter L |
4741 | // that occurs later than a filter F could be replaced by the intersection of |
4742 | // the elements of F and L. In reality two typeinfos can match without being |
4743 | // equal (for example if one represents a C++ class, and the other some class |
4744 | // derived from it) so it would be wrong to perform this transform in general. |
4745 | // However the transform is correct and useful if F is a subset of L. In that |
4746 | // case L can be replaced by F, and thus removed altogether since repeating a |
4747 | // filter is pointless. So here we look at all pairs of filters F and L where |
4748 | // L follows F in the list of clauses, and remove L if every element of F is |
4749 | // an element of L. This can occur when inlining C++ functions with exception |
4750 | // specifications. |
4751 | for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) { |
4752 | // Examine each filter in turn. |
4753 | Value *Filter = NewClauses[i]; |
4754 | ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType()); |
4755 | if (!FTy) |
4756 | // Not a filter - skip it. |
4757 | continue; |
4758 | unsigned FElts = FTy->getNumElements(); |
4759 | // Examine each filter following this one. Doing this backwards means that |
4760 | // we don't have to worry about filters disappearing under us when removed. |
4761 | for (unsigned j = NewClauses.size() - 1; j != i; --j) { |
4762 | Value *LFilter = NewClauses[j]; |
4763 | ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType()); |
4764 | if (!LTy) |
4765 | // Not a filter - skip it. |
4766 | continue; |
4767 | // If Filter is a subset of LFilter, i.e. every element of Filter is also |
4768 | // an element of LFilter, then discard LFilter. |
4769 | SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j; |
4770 | // If Filter is empty then it is a subset of LFilter. |
4771 | if (!FElts) { |
4772 | // Discard LFilter. |
4773 | NewClauses.erase(CI: J); |
4774 | MakeNewInstruction = true; |
4775 | // Move on to the next filter. |
4776 | continue; |
4777 | } |
4778 | unsigned LElts = LTy->getNumElements(); |
4779 | // If Filter is longer than LFilter then it cannot be a subset of it. |
4780 | if (FElts > LElts) |
4781 | // Move on to the next filter. |
4782 | continue; |
4783 | // At this point we know that LFilter has at least one element. |
4784 | if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros. |
4785 | // Filter is a subset of LFilter iff Filter contains only zeros (as we |
4786 | // already know that Filter is not longer than LFilter). |
4787 | if (isa<ConstantAggregateZero>(Val: Filter)) { |
4788 | assert(FElts <= LElts && "Should have handled this case earlier!" ); |
4789 | // Discard LFilter. |
4790 | NewClauses.erase(CI: J); |
4791 | MakeNewInstruction = true; |
4792 | } |
4793 | // Move on to the next filter. |
4794 | continue; |
4795 | } |
4796 | ConstantArray *LArray = cast<ConstantArray>(Val: LFilter); |
4797 | if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros. |
4798 | // Since Filter is non-empty and contains only zeros, it is a subset of |
4799 | // LFilter iff LFilter contains a zero. |
4800 | assert(FElts > 0 && "Should have eliminated the empty filter earlier!" ); |
4801 | for (unsigned l = 0; l != LElts; ++l) |
4802 | if (LArray->getOperand(i_nocapture: l)->isNullValue()) { |
4803 | // LFilter contains a zero - discard it. |
4804 | NewClauses.erase(CI: J); |
4805 | MakeNewInstruction = true; |
4806 | break; |
4807 | } |
4808 | // Move on to the next filter. |
4809 | continue; |
4810 | } |
4811 | // At this point we know that both filters are ConstantArrays. Loop over |
4812 | // operands to see whether every element of Filter is also an element of |
4813 | // LFilter. Since filters tend to be short this is probably faster than |
4814 | // using a method that scales nicely. |
4815 | ConstantArray *FArray = cast<ConstantArray>(Val: Filter); |
4816 | bool AllFound = true; |
4817 | for (unsigned f = 0; f != FElts; ++f) { |
4818 | Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts(); |
4819 | AllFound = false; |
4820 | for (unsigned l = 0; l != LElts; ++l) { |
4821 | Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts(); |
4822 | if (LTypeInfo == FTypeInfo) { |
4823 | AllFound = true; |
4824 | break; |
4825 | } |
4826 | } |
4827 | if (!AllFound) |
4828 | break; |
4829 | } |
4830 | if (AllFound) { |
4831 | // Discard LFilter. |
4832 | NewClauses.erase(CI: J); |
4833 | MakeNewInstruction = true; |
4834 | } |
4835 | // Move on to the next filter. |
4836 | } |
4837 | } |
4838 | |
4839 | // If we changed any of the clauses, replace the old landingpad instruction |
4840 | // with a new one. |
4841 | if (MakeNewInstruction) { |
4842 | LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(), |
4843 | NumReservedClauses: NewClauses.size()); |
4844 | for (Constant *C : NewClauses) |
4845 | NLI->addClause(ClauseVal: C); |
4846 | // A landing pad with no clauses must have the cleanup flag set. It is |
4847 | // theoretically possible, though highly unlikely, that we eliminated all |
4848 | // clauses. If so, force the cleanup flag to true. |
4849 | if (NewClauses.empty()) |
4850 | CleanupFlag = true; |
4851 | NLI->setCleanup(CleanupFlag); |
4852 | return NLI; |
4853 | } |
4854 | |
4855 | // Even if none of the clauses changed, we may nonetheless have understood |
4856 | // that the cleanup flag is pointless. Clear it if so. |
4857 | if (LI.isCleanup() != CleanupFlag) { |
4858 | assert(!CleanupFlag && "Adding a cleanup, not removing one?!" ); |
4859 | LI.setCleanup(CleanupFlag); |
4860 | return &LI; |
4861 | } |
4862 | |
4863 | return nullptr; |
4864 | } |
4865 | |
4866 | Value * |
4867 | InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { |
4868 | // Try to push freeze through instructions that propagate but don't produce |
4869 | // poison as far as possible. If an operand of freeze follows three |
4870 | // conditions 1) one-use, 2) does not produce poison, and 3) has all but one |
4871 | // guaranteed-non-poison operands then push the freeze through to the one |
4872 | // operand that is not guaranteed non-poison. The actual transform is as |
4873 | // follows. |
4874 | // Op1 = ... ; Op1 can be posion |
4875 | // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have |
4876 | // ; single guaranteed-non-poison operands |
4877 | // ... = Freeze(Op0) |
4878 | // => |
4879 | // Op1 = ... |
4880 | // Op1.fr = Freeze(Op1) |
4881 | // ... = Inst(Op1.fr, NonPoisonOps...) |
4882 | auto *OrigOp = OrigFI.getOperand(i_nocapture: 0); |
4883 | auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp); |
4884 | |
4885 | // While we could change the other users of OrigOp to use freeze(OrigOp), that |
4886 | // potentially reduces their optimization potential, so let's only do this iff |
4887 | // the OrigOp is only used by the freeze. |
4888 | if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp)) |
4889 | return nullptr; |
4890 | |
4891 | // We can't push the freeze through an instruction which can itself create |
4892 | // poison. If the only source of new poison is flags, we can simply |
4893 | // strip them (since we know the only use is the freeze and nothing can |
4894 | // benefit from them.) |
4895 | if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp), |
4896 | /*ConsiderFlagsAndMetadata*/ false)) |
4897 | return nullptr; |
4898 | |
4899 | // If operand is guaranteed not to be poison, there is no need to add freeze |
4900 | // to the operand. So we first find the operand that is not guaranteed to be |
4901 | // poison. |
4902 | Use *MaybePoisonOperand = nullptr; |
4903 | for (Use &U : OrigOpInst->operands()) { |
4904 | if (isa<MetadataAsValue>(Val: U.get()) || |
4905 | isGuaranteedNotToBeUndefOrPoison(V: U.get())) |
4906 | continue; |
4907 | if (!MaybePoisonOperand) |
4908 | MaybePoisonOperand = &U; |
4909 | else |
4910 | return nullptr; |
4911 | } |
4912 | |
4913 | OrigOpInst->dropPoisonGeneratingAnnotations(); |
4914 | |
4915 | // If all operands are guaranteed to be non-poison, we can drop freeze. |
4916 | if (!MaybePoisonOperand) |
4917 | return OrigOp; |
4918 | |
4919 | Builder.SetInsertPoint(OrigOpInst); |
4920 | auto *FrozenMaybePoisonOperand = Builder.CreateFreeze( |
4921 | V: MaybePoisonOperand->get(), Name: MaybePoisonOperand->get()->getName() + ".fr" ); |
4922 | |
4923 | replaceUse(U&: *MaybePoisonOperand, NewValue: FrozenMaybePoisonOperand); |
4924 | return OrigOp; |
4925 | } |
4926 | |
4927 | Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, |
4928 | PHINode *PN) { |
4929 | // Detect whether this is a recurrence with a start value and some number of |
4930 | // backedge values. We'll check whether we can push the freeze through the |
4931 | // backedge values (possibly dropping poison flags along the way) until we |
4932 | // reach the phi again. In that case, we can move the freeze to the start |
4933 | // value. |
4934 | Use *StartU = nullptr; |
4935 | SmallVector<Value *> Worklist; |
4936 | for (Use &U : PN->incoming_values()) { |
4937 | if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) { |
4938 | // Add backedge value to worklist. |
4939 | Worklist.push_back(Elt: U.get()); |
4940 | continue; |
4941 | } |
4942 | |
4943 | // Don't bother handling multiple start values. |
4944 | if (StartU) |
4945 | return nullptr; |
4946 | StartU = &U; |
4947 | } |
4948 | |
4949 | if (!StartU || Worklist.empty()) |
4950 | return nullptr; // Not a recurrence. |
4951 | |
4952 | Value *StartV = StartU->get(); |
4953 | BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU); |
4954 | bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV); |
4955 | // We can't insert freeze if the start value is the result of the |
4956 | // terminator (e.g. an invoke). |
4957 | if (StartNeedsFreeze && StartBB->getTerminator() == StartV) |
4958 | return nullptr; |
4959 | |
4960 | SmallPtrSet<Value *, 32> Visited; |
4961 | SmallVector<Instruction *> DropFlags; |
4962 | while (!Worklist.empty()) { |
4963 | Value *V = Worklist.pop_back_val(); |
4964 | if (!Visited.insert(Ptr: V).second) |
4965 | continue; |
4966 | |
4967 | if (Visited.size() > 32) |
4968 | return nullptr; // Limit the total number of values we inspect. |
4969 | |
4970 | // Assume that PN is non-poison, because it will be after the transform. |
4971 | if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) |
4972 | continue; |
4973 | |
4974 | Instruction *I = dyn_cast<Instruction>(Val: V); |
4975 | if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I), |
4976 | /*ConsiderFlagsAndMetadata*/ false)) |
4977 | return nullptr; |
4978 | |
4979 | DropFlags.push_back(Elt: I); |
4980 | append_range(C&: Worklist, R: I->operands()); |
4981 | } |
4982 | |
4983 | for (Instruction *I : DropFlags) |
4984 | I->dropPoisonGeneratingAnnotations(); |
4985 | |
4986 | if (StartNeedsFreeze) { |
4987 | Builder.SetInsertPoint(StartBB->getTerminator()); |
4988 | Value *FrozenStartV = Builder.CreateFreeze(V: StartV, |
4989 | Name: StartV->getName() + ".fr" ); |
4990 | replaceUse(U&: *StartU, NewValue: FrozenStartV); |
4991 | } |
4992 | return replaceInstUsesWith(I&: FI, V: PN); |
4993 | } |
4994 | |
4995 | bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { |
4996 | Value *Op = FI.getOperand(i_nocapture: 0); |
4997 | |
4998 | if (isa<Constant>(Val: Op) || Op->hasOneUse()) |
4999 | return false; |
5000 | |
5001 | // Move the freeze directly after the definition of its operand, so that |
5002 | // it dominates the maximum number of uses. Note that it may not dominate |
5003 | // *all* uses if the operand is an invoke/callbr and the use is in a phi on |
5004 | // the normal/default destination. This is why the domination check in the |
5005 | // replacement below is still necessary. |
5006 | BasicBlock::iterator MoveBefore; |
5007 | if (isa<Argument>(Val: Op)) { |
5008 | MoveBefore = |
5009 | FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); |
5010 | } else { |
5011 | auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef(); |
5012 | if (!MoveBeforeOpt) |
5013 | return false; |
5014 | MoveBefore = *MoveBeforeOpt; |
5015 | } |
5016 | |
5017 | // Re-point iterator to come after any debug-info records. |
5018 | MoveBefore.setHeadBit(false); |
5019 | |
5020 | bool Changed = false; |
5021 | if (&FI != &*MoveBefore) { |
5022 | FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore); |
5023 | Changed = true; |
5024 | } |
5025 | |
5026 | Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool { |
5027 | bool Dominates = DT.dominates(Def: &FI, U); |
5028 | Changed |= Dominates; |
5029 | return Dominates; |
5030 | }); |
5031 | |
5032 | return Changed; |
5033 | } |
5034 | |
5035 | // Check if any direct or bitcast user of this value is a shuffle instruction. |
5036 | static bool isUsedWithinShuffleVector(Value *V) { |
5037 | for (auto *U : V->users()) { |
5038 | if (isa<ShuffleVectorInst>(Val: U)) |
5039 | return true; |
5040 | else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U)) |
5041 | return true; |
5042 | } |
5043 | return false; |
5044 | } |
5045 | |
5046 | Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { |
5047 | Value *Op0 = I.getOperand(i_nocapture: 0); |
5048 | |
5049 | if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I))) |
5050 | return replaceInstUsesWith(I, V); |
5051 | |
5052 | // freeze (phi const, x) --> phi const, (freeze x) |
5053 | if (auto *PN = dyn_cast<PHINode>(Val: Op0)) { |
5054 | if (Instruction *NV = foldOpIntoPhi(I, PN)) |
5055 | return NV; |
5056 | if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN)) |
5057 | return NV; |
5058 | } |
5059 | |
5060 | if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I)) |
5061 | return replaceInstUsesWith(I, V: NI); |
5062 | |
5063 | // If I is freeze(undef), check its uses and fold it to a fixed constant. |
5064 | // - or: pick -1 |
5065 | // - select's condition: if the true value is constant, choose it by making |
5066 | // the condition true. |
5067 | // - default: pick 0 |
5068 | // |
5069 | // Note that this transform is intentionally done here rather than |
5070 | // via an analysis in InstSimplify or at individual user sites. That is |
5071 | // because we must produce the same value for all uses of the freeze - |
5072 | // it's the reason "freeze" exists! |
5073 | // |
5074 | // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid |
5075 | // duplicating logic for binops at least. |
5076 | auto getUndefReplacement = [&](Type *Ty) { |
5077 | Value *BestValue = nullptr; |
5078 | Value *NullValue = Constant::getNullValue(Ty); |
5079 | for (const auto *U : I.users()) { |
5080 | Value *V = NullValue; |
5081 | if (match(V: U, P: m_Or(L: m_Value(), R: m_Value()))) |
5082 | V = ConstantInt::getAllOnesValue(Ty); |
5083 | else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value()))) |
5084 | V = ConstantInt::getTrue(Ty); |
5085 | else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) { |
5086 | if (!isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT)) |
5087 | V = NullValue; |
5088 | } |
5089 | |
5090 | if (!BestValue) |
5091 | BestValue = V; |
5092 | else if (BestValue != V) |
5093 | BestValue = NullValue; |
5094 | } |
5095 | assert(BestValue && "Must have at least one use" ); |
5096 | return BestValue; |
5097 | }; |
5098 | |
5099 | if (match(V: Op0, P: m_Undef())) { |
5100 | // Don't fold freeze(undef/poison) if it's used as a vector operand in |
5101 | // a shuffle. This may improve codegen for shuffles that allow |
5102 | // unspecified inputs. |
5103 | if (isUsedWithinShuffleVector(V: &I)) |
5104 | return nullptr; |
5105 | return replaceInstUsesWith(I, V: getUndefReplacement(I.getType())); |
5106 | } |
5107 | |
5108 | auto getFreezeVectorReplacement = [](Constant *C) -> Constant * { |
5109 | Type *Ty = C->getType(); |
5110 | auto *VTy = dyn_cast<FixedVectorType>(Val: Ty); |
5111 | if (!VTy) |
5112 | return nullptr; |
5113 | unsigned NumElts = VTy->getNumElements(); |
5114 | Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType()); |
5115 | for (unsigned i = 0; i != NumElts; ++i) { |
5116 | Constant *EltC = C->getAggregateElement(Elt: i); |
5117 | if (EltC && !match(V: EltC, P: m_Undef())) { |
5118 | BestValue = EltC; |
5119 | break; |
5120 | } |
5121 | } |
5122 | return Constant::replaceUndefsWith(C, Replacement: BestValue); |
5123 | }; |
5124 | |
5125 | Constant *C; |
5126 | if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() && |
5127 | !C->containsConstantExpression()) { |
5128 | if (Constant *Repl = getFreezeVectorReplacement(C)) |
5129 | return replaceInstUsesWith(I, V: Repl); |
5130 | } |
5131 | |
5132 | // Replace uses of Op with freeze(Op). |
5133 | if (freezeOtherUses(FI&: I)) |
5134 | return &I; |
5135 | |
5136 | return nullptr; |
5137 | } |
5138 | |
5139 | /// Check for case where the call writes to an otherwise dead alloca. This |
5140 | /// shows up for unused out-params in idiomatic C/C++ code. Note that this |
5141 | /// helper *only* analyzes the write; doesn't check any other legality aspect. |
5142 | static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) { |
5143 | auto *CB = dyn_cast<CallBase>(Val: I); |
5144 | if (!CB) |
5145 | // TODO: handle e.g. store to alloca here - only worth doing if we extend |
5146 | // to allow reload along used path as described below. Otherwise, this |
5147 | // is simply a store to a dead allocation which will be removed. |
5148 | return false; |
5149 | std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI); |
5150 | if (!Dest) |
5151 | return false; |
5152 | auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr)); |
5153 | if (!AI) |
5154 | // TODO: allow malloc? |
5155 | return false; |
5156 | // TODO: allow memory access dominated by move point? Note that since AI |
5157 | // could have a reference to itself captured by the call, we would need to |
5158 | // account for cycles in doing so. |
5159 | SmallVector<const User *> AllocaUsers; |
5160 | SmallPtrSet<const User *, 4> Visited; |
5161 | auto pushUsers = [&](const Instruction &I) { |
5162 | for (const User *U : I.users()) { |
5163 | if (Visited.insert(Ptr: U).second) |
5164 | AllocaUsers.push_back(Elt: U); |
5165 | } |
5166 | }; |
5167 | pushUsers(*AI); |
5168 | while (!AllocaUsers.empty()) { |
5169 | auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val()); |
5170 | if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) { |
5171 | pushUsers(*UserI); |
5172 | continue; |
5173 | } |
5174 | if (UserI == CB) |
5175 | continue; |
5176 | // TODO: support lifetime.start/end here |
5177 | return false; |
5178 | } |
5179 | return true; |
5180 | } |
5181 | |
5182 | /// Try to move the specified instruction from its current block into the |
5183 | /// beginning of DestBlock, which can only happen if it's safe to move the |
5184 | /// instruction past all of the instructions between it and the end of its |
5185 | /// block. |
5186 | bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, |
5187 | BasicBlock *DestBlock) { |
5188 | BasicBlock *SrcBlock = I->getParent(); |
5189 | |
5190 | // Cannot move control-flow-involving, volatile loads, vaarg, etc. |
5191 | if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() || |
5192 | I->isTerminator()) |
5193 | return false; |
5194 | |
5195 | // Do not sink static or dynamic alloca instructions. Static allocas must |
5196 | // remain in the entry block, and dynamic allocas must not be sunk in between |
5197 | // a stacksave / stackrestore pair, which would incorrectly shorten its |
5198 | // lifetime. |
5199 | if (isa<AllocaInst>(Val: I)) |
5200 | return false; |
5201 | |
5202 | // Do not sink into catchswitch blocks. |
5203 | if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator())) |
5204 | return false; |
5205 | |
5206 | // Do not sink convergent call instructions. |
5207 | if (auto *CI = dyn_cast<CallInst>(Val: I)) { |
5208 | if (CI->isConvergent()) |
5209 | return false; |
5210 | } |
5211 | |
5212 | // Unless we can prove that the memory write isn't visibile except on the |
5213 | // path we're sinking to, we must bail. |
5214 | if (I->mayWriteToMemory()) { |
5215 | if (!SoleWriteToDeadLocal(I, TLI)) |
5216 | return false; |
5217 | } |
5218 | |
5219 | // We can only sink load instructions if there is nothing between the load and |
5220 | // the end of block that could change the value. |
5221 | if (I->mayReadFromMemory() && |
5222 | !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) { |
5223 | // We don't want to do any sophisticated alias analysis, so we only check |
5224 | // the instructions after I in I's parent block if we try to sink to its |
5225 | // successor block. |
5226 | if (DestBlock->getUniquePredecessor() != I->getParent()) |
5227 | return false; |
5228 | for (BasicBlock::iterator Scan = std::next(x: I->getIterator()), |
5229 | E = I->getParent()->end(); |
5230 | Scan != E; ++Scan) |
5231 | if (Scan->mayWriteToMemory()) |
5232 | return false; |
5233 | } |
5234 | |
5235 | I->dropDroppableUses(ShouldDrop: [&](const Use *U) { |
5236 | auto *I = dyn_cast<Instruction>(Val: U->getUser()); |
5237 | if (I && I->getParent() != DestBlock) { |
5238 | Worklist.add(I); |
5239 | return true; |
5240 | } |
5241 | return false; |
5242 | }); |
5243 | /// FIXME: We could remove droppable uses that are not dominated by |
5244 | /// the new position. |
5245 | |
5246 | BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); |
5247 | I->moveBefore(BB&: *DestBlock, I: InsertPos); |
5248 | ++NumSunkInst; |
5249 | |
5250 | // Also sink all related debug uses from the source basic block. Otherwise we |
5251 | // get debug use before the def. Attempt to salvage debug uses first, to |
5252 | // maximise the range variables have location for. If we cannot salvage, then |
5253 | // mark the location undef: we know it was supposed to receive a new location |
5254 | // here, but that computation has been sunk. |
5255 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsers; |
5256 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecords; |
5257 | findDbgUsers(DbgInsts&: DbgUsers, V: I, DbgVariableRecords: &DbgVariableRecords); |
5258 | if (!DbgUsers.empty()) |
5259 | tryToSinkInstructionDbgValues(I, InsertPos, SrcBlock, DestBlock, DbgUsers); |
5260 | if (!DbgVariableRecords.empty()) |
5261 | tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock, |
5262 | DPUsers&: DbgVariableRecords); |
5263 | |
5264 | // PS: there are numerous flaws with this behaviour, not least that right now |
5265 | // assignments can be re-ordered past other assignments to the same variable |
5266 | // if they use different Values. Creating more undef assignements can never be |
5267 | // undone. And salvaging all users outside of this block can un-necessarily |
5268 | // alter the lifetime of the live-value that the variable refers to. |
5269 | // Some of these things can be resolved by tolerating debug use-before-defs in |
5270 | // LLVM-IR, however it depends on the instruction-referencing CodeGen backend |
5271 | // being used for more architectures. |
5272 | |
5273 | return true; |
5274 | } |
5275 | |
5276 | void InstCombinerImpl::tryToSinkInstructionDbgValues( |
5277 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
5278 | BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers) { |
5279 | // For all debug values in the destination block, the sunk instruction |
5280 | // will still be available, so they do not need to be dropped. |
5281 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSalvage; |
5282 | for (auto &DbgUser : DbgUsers) |
5283 | if (DbgUser->getParent() != DestBlock) |
5284 | DbgUsersToSalvage.push_back(Elt: DbgUser); |
5285 | |
5286 | // Process the sinking DbgUsersToSalvage in reverse order, as we only want |
5287 | // to clone the last appearing debug intrinsic for each given variable. |
5288 | SmallVector<DbgVariableIntrinsic *, 2> DbgUsersToSink; |
5289 | for (DbgVariableIntrinsic *DVI : DbgUsersToSalvage) |
5290 | if (DVI->getParent() == SrcBlock) |
5291 | DbgUsersToSink.push_back(Elt: DVI); |
5292 | llvm::sort(C&: DbgUsersToSink, |
5293 | Comp: [](auto *A, auto *B) { return B->comesBefore(A); }); |
5294 | |
5295 | SmallVector<DbgVariableIntrinsic *, 2> DIIClones; |
5296 | SmallSet<DebugVariable, 4> SunkVariables; |
5297 | for (auto *User : DbgUsersToSink) { |
5298 | // A dbg.declare instruction should not be cloned, since there can only be |
5299 | // one per variable fragment. It should be left in the original place |
5300 | // because the sunk instruction is not an alloca (otherwise we could not be |
5301 | // here). |
5302 | if (isa<DbgDeclareInst>(Val: User)) |
5303 | continue; |
5304 | |
5305 | DebugVariable DbgUserVariable = |
5306 | DebugVariable(User->getVariable(), User->getExpression(), |
5307 | User->getDebugLoc()->getInlinedAt()); |
5308 | |
5309 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
5310 | continue; |
5311 | |
5312 | // Leave dbg.assign intrinsics in their original positions and there should |
5313 | // be no need to insert a clone. |
5314 | if (isa<DbgAssignIntrinsic>(Val: User)) |
5315 | continue; |
5316 | |
5317 | DIIClones.emplace_back(Args: cast<DbgVariableIntrinsic>(Val: User->clone())); |
5318 | if (isa<DbgDeclareInst>(Val: User) && isa<CastInst>(Val: I)) |
5319 | DIIClones.back()->replaceVariableLocationOp(OldValue: I, NewValue: I->getOperand(i: 0)); |
5320 | LLVM_DEBUG(dbgs() << "CLONE: " << *DIIClones.back() << '\n'); |
5321 | } |
5322 | |
5323 | // Perform salvaging without the clones, then sink the clones. |
5324 | if (!DIIClones.empty()) { |
5325 | salvageDebugInfoForDbgValues(I&: *I, Insns: DbgUsersToSalvage, DPInsns: {}); |
5326 | // The clones are in reverse order of original appearance, reverse again to |
5327 | // maintain the original order. |
5328 | for (auto &DIIClone : llvm::reverse(C&: DIIClones)) { |
5329 | DIIClone->insertBefore(InsertPos); |
5330 | LLVM_DEBUG(dbgs() << "SINK: " << *DIIClone << '\n'); |
5331 | } |
5332 | } |
5333 | } |
5334 | |
5335 | void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords( |
5336 | Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock, |
5337 | BasicBlock *DestBlock, |
5338 | SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) { |
5339 | // Implementation of tryToSinkInstructionDbgValues, but for the |
5340 | // DbgVariableRecord of variable assignments rather than dbg.values. |
5341 | |
5342 | // Fetch all DbgVariableRecords not already in the destination. |
5343 | SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage; |
5344 | for (auto &DVR : DbgVariableRecords) |
5345 | if (DVR->getParent() != DestBlock) |
5346 | DbgVariableRecordsToSalvage.push_back(Elt: DVR); |
5347 | |
5348 | // Fetch a second collection, of DbgVariableRecords in the source block that |
5349 | // we're going to sink. |
5350 | SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink; |
5351 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage) |
5352 | if (DVR->getParent() == SrcBlock) |
5353 | DbgVariableRecordsToSink.push_back(Elt: DVR); |
5354 | |
5355 | // Sort DbgVariableRecords according to their position in the block. This is a |
5356 | // partial order: DbgVariableRecords attached to different instructions will |
5357 | // be ordered by the instruction order, but DbgVariableRecords attached to the |
5358 | // same instruction won't have an order. |
5359 | auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool { |
5360 | return B->getInstruction()->comesBefore(Other: A->getInstruction()); |
5361 | }; |
5362 | llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order); |
5363 | |
5364 | // If there are two assignments to the same variable attached to the same |
5365 | // instruction, the ordering between the two assignments is important. Scan |
5366 | // for this (rare) case and establish which is the last assignment. |
5367 | using InstVarPair = std::pair<const Instruction *, DebugVariable>; |
5368 | SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap; |
5369 | if (DbgVariableRecordsToSink.size() > 1) { |
5370 | SmallDenseMap<InstVarPair, unsigned> CountMap; |
5371 | // Count how many assignments to each variable there is per instruction. |
5372 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
5373 | DebugVariable DbgUserVariable = |
5374 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
5375 | DVR->getDebugLoc()->getInlinedAt()); |
5376 | CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1; |
5377 | } |
5378 | |
5379 | // If there are any instructions with two assignments, add them to the |
5380 | // FilterOutMap to record that they need extra filtering. |
5381 | SmallPtrSet<const Instruction *, 4> DupSet; |
5382 | for (auto It : CountMap) { |
5383 | if (It.second > 1) { |
5384 | FilterOutMap[It.first] = nullptr; |
5385 | DupSet.insert(Ptr: It.first.first); |
5386 | } |
5387 | } |
5388 | |
5389 | // For all instruction/variable pairs needing extra filtering, find the |
5390 | // latest assignment. |
5391 | for (const Instruction *Inst : DupSet) { |
5392 | for (DbgVariableRecord &DVR : |
5393 | llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) { |
5394 | DebugVariable DbgUserVariable = |
5395 | DebugVariable(DVR.getVariable(), DVR.getExpression(), |
5396 | DVR.getDebugLoc()->getInlinedAt()); |
5397 | auto FilterIt = |
5398 | FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable)); |
5399 | if (FilterIt == FilterOutMap.end()) |
5400 | continue; |
5401 | if (FilterIt->second != nullptr) |
5402 | continue; |
5403 | FilterIt->second = &DVR; |
5404 | } |
5405 | } |
5406 | } |
5407 | |
5408 | // Perform cloning of the DbgVariableRecords that we plan on sinking, filter |
5409 | // out any duplicate assignments identified above. |
5410 | SmallVector<DbgVariableRecord *, 2> DVRClones; |
5411 | SmallSet<DebugVariable, 4> SunkVariables; |
5412 | for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) { |
5413 | if (DVR->Type == DbgVariableRecord::LocationType::Declare) |
5414 | continue; |
5415 | |
5416 | DebugVariable DbgUserVariable = |
5417 | DebugVariable(DVR->getVariable(), DVR->getExpression(), |
5418 | DVR->getDebugLoc()->getInlinedAt()); |
5419 | |
5420 | // For any variable where there were multiple assignments in the same place, |
5421 | // ignore all but the last assignment. |
5422 | if (!FilterOutMap.empty()) { |
5423 | InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable); |
5424 | auto It = FilterOutMap.find(Val: IVP); |
5425 | |
5426 | // Filter out. |
5427 | if (It != FilterOutMap.end() && It->second != DVR) |
5428 | continue; |
5429 | } |
5430 | |
5431 | if (!SunkVariables.insert(V: DbgUserVariable).second) |
5432 | continue; |
5433 | |
5434 | if (DVR->isDbgAssign()) |
5435 | continue; |
5436 | |
5437 | DVRClones.emplace_back(Args: DVR->clone()); |
5438 | LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n'); |
5439 | } |
5440 | |
5441 | // Perform salvaging without the clones, then sink the clones. |
5442 | if (DVRClones.empty()) |
5443 | return; |
5444 | |
5445 | salvageDebugInfoForDbgValues(I&: *I, Insns: {}, DPInsns: DbgVariableRecordsToSalvage); |
5446 | |
5447 | // The clones are in reverse order of original appearance. Assert that the |
5448 | // head bit is set on the iterator as we _should_ have received it via |
5449 | // getFirstInsertionPt. Inserting like this will reverse the clone order as |
5450 | // we'll repeatedly insert at the head, such as: |
5451 | // DVR-3 (third insertion goes here) |
5452 | // DVR-2 (second insertion goes here) |
5453 | // DVR-1 (first insertion goes here) |
5454 | // Any-Prior-DVRs |
5455 | // InsertPtInst |
5456 | assert(InsertPos.getHeadBit()); |
5457 | for (DbgVariableRecord *DVRClone : DVRClones) { |
5458 | InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos); |
5459 | LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n'); |
5460 | } |
5461 | } |
5462 | |
5463 | bool InstCombinerImpl::run() { |
5464 | while (!Worklist.isEmpty()) { |
5465 | // Walk deferred instructions in reverse order, and push them to the |
5466 | // worklist, which means they'll end up popped from the worklist in-order. |
5467 | while (Instruction *I = Worklist.popDeferred()) { |
5468 | // Check to see if we can DCE the instruction. We do this already here to |
5469 | // reduce the number of uses and thus allow other folds to trigger. |
5470 | // Note that eraseInstFromFunction() may push additional instructions on |
5471 | // the deferred worklist, so this will DCE whole instruction chains. |
5472 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5473 | eraseInstFromFunction(I&: *I); |
5474 | ++NumDeadInst; |
5475 | continue; |
5476 | } |
5477 | |
5478 | Worklist.push(I); |
5479 | } |
5480 | |
5481 | Instruction *I = Worklist.removeOne(); |
5482 | if (I == nullptr) continue; // skip null values. |
5483 | |
5484 | // Check to see if we can DCE the instruction. |
5485 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5486 | eraseInstFromFunction(I&: *I); |
5487 | ++NumDeadInst; |
5488 | continue; |
5489 | } |
5490 | |
5491 | if (!DebugCounter::shouldExecute(CounterName: VisitCounter)) |
5492 | continue; |
5493 | |
5494 | // See if we can trivially sink this instruction to its user if we can |
5495 | // prove that the successor is not executed more frequently than our block. |
5496 | // Return the UserBlock if successful. |
5497 | auto getOptionalSinkBlockForInst = |
5498 | [this](Instruction *I) -> std::optional<BasicBlock *> { |
5499 | if (!EnableCodeSinking) |
5500 | return std::nullopt; |
5501 | |
5502 | BasicBlock *BB = I->getParent(); |
5503 | BasicBlock *UserParent = nullptr; |
5504 | unsigned NumUsers = 0; |
5505 | |
5506 | for (Use &U : I->uses()) { |
5507 | User *User = U.getUser(); |
5508 | if (User->isDroppable()) |
5509 | continue; |
5510 | if (NumUsers > MaxSinkNumUsers) |
5511 | return std::nullopt; |
5512 | |
5513 | Instruction *UserInst = cast<Instruction>(Val: User); |
5514 | // Special handling for Phi nodes - get the block the use occurs in. |
5515 | BasicBlock *UserBB = UserInst->getParent(); |
5516 | if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst)) |
5517 | UserBB = PN->getIncomingBlock(U); |
5518 | // Bail out if we have uses in different blocks. We don't do any |
5519 | // sophisticated analysis (i.e finding NearestCommonDominator of these |
5520 | // use blocks). |
5521 | if (UserParent && UserParent != UserBB) |
5522 | return std::nullopt; |
5523 | UserParent = UserBB; |
5524 | |
5525 | // Make sure these checks are done only once, naturally we do the checks |
5526 | // the first time we get the userparent, this will save compile time. |
5527 | if (NumUsers == 0) { |
5528 | // Try sinking to another block. If that block is unreachable, then do |
5529 | // not bother. SimplifyCFG should handle it. |
5530 | if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent)) |
5531 | return std::nullopt; |
5532 | |
5533 | auto *Term = UserParent->getTerminator(); |
5534 | // See if the user is one of our successors that has only one |
5535 | // predecessor, so that we don't have to split the critical edge. |
5536 | // Another option where we can sink is a block that ends with a |
5537 | // terminator that does not pass control to other block (such as |
5538 | // return or unreachable or resume). In this case: |
5539 | // - I dominates the User (by SSA form); |
5540 | // - the User will be executed at most once. |
5541 | // So sinking I down to User is always profitable or neutral. |
5542 | if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term)) |
5543 | return std::nullopt; |
5544 | |
5545 | assert(DT.dominates(BB, UserParent) && "Dominance relation broken?" ); |
5546 | } |
5547 | |
5548 | NumUsers++; |
5549 | } |
5550 | |
5551 | // No user or only has droppable users. |
5552 | if (!UserParent) |
5553 | return std::nullopt; |
5554 | |
5555 | return UserParent; |
5556 | }; |
5557 | |
5558 | auto OptBB = getOptionalSinkBlockForInst(I); |
5559 | if (OptBB) { |
5560 | auto *UserParent = *OptBB; |
5561 | // Okay, the CFG is simple enough, try to sink this instruction. |
5562 | if (tryToSinkInstruction(I, DestBlock: UserParent)) { |
5563 | LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); |
5564 | MadeIRChange = true; |
5565 | // We'll add uses of the sunk instruction below, but since |
5566 | // sinking can expose opportunities for it's *operands* add |
5567 | // them to the worklist |
5568 | for (Use &U : I->operands()) |
5569 | if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get())) |
5570 | Worklist.push(I: OpI); |
5571 | } |
5572 | } |
5573 | |
5574 | // Now that we have an instruction, try combining it to simplify it. |
5575 | Builder.SetInsertPoint(I); |
5576 | Builder.CollectMetadataToCopy( |
5577 | Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); |
5578 | |
5579 | #ifndef NDEBUG |
5580 | std::string OrigI; |
5581 | #endif |
5582 | LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS);); |
5583 | LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); |
5584 | |
5585 | if (Instruction *Result = visit(I&: *I)) { |
5586 | ++NumCombined; |
5587 | // Should we replace the old instruction with a new one? |
5588 | if (Result != I) { |
5589 | LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' |
5590 | << " New = " << *Result << '\n'); |
5591 | |
5592 | // We copy the old instruction's DebugLoc to the new instruction, unless |
5593 | // InstCombine already assigned a DebugLoc to it, in which case we |
5594 | // should trust the more specifically selected DebugLoc. |
5595 | Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc())); |
5596 | // We also copy annotation metadata to the new instruction. |
5597 | Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation); |
5598 | // Everything uses the new instruction now. |
5599 | I->replaceAllUsesWith(V: Result); |
5600 | |
5601 | // Move the name to the new instruction first. |
5602 | Result->takeName(V: I); |
5603 | |
5604 | // Insert the new instruction into the basic block... |
5605 | BasicBlock *InstParent = I->getParent(); |
5606 | BasicBlock::iterator InsertPos = I->getIterator(); |
5607 | |
5608 | // Are we replace a PHI with something that isn't a PHI, or vice versa? |
5609 | if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) { |
5610 | // We need to fix up the insertion point. |
5611 | if (isa<PHINode>(Val: I)) // PHI -> Non-PHI |
5612 | InsertPos = InstParent->getFirstInsertionPt(); |
5613 | else // Non-PHI -> PHI |
5614 | InsertPos = InstParent->getFirstNonPHIIt(); |
5615 | } |
5616 | |
5617 | Result->insertInto(ParentBB: InstParent, It: InsertPos); |
5618 | |
5619 | // Push the new instruction and any users onto the worklist. |
5620 | Worklist.pushUsersToWorkList(I&: *Result); |
5621 | Worklist.push(I: Result); |
5622 | |
5623 | eraseInstFromFunction(I&: *I); |
5624 | } else { |
5625 | LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' |
5626 | << " New = " << *I << '\n'); |
5627 | |
5628 | // If the instruction was modified, it's possible that it is now dead. |
5629 | // if so, remove it. |
5630 | if (isInstructionTriviallyDead(I, TLI: &TLI)) { |
5631 | eraseInstFromFunction(I&: *I); |
5632 | } else { |
5633 | Worklist.pushUsersToWorkList(I&: *I); |
5634 | Worklist.push(I); |
5635 | } |
5636 | } |
5637 | MadeIRChange = true; |
5638 | } |
5639 | } |
5640 | |
5641 | Worklist.zap(); |
5642 | return MadeIRChange; |
5643 | } |
5644 | |
5645 | // Track the scopes used by !alias.scope and !noalias. In a function, a |
5646 | // @llvm.experimental.noalias.scope.decl is only useful if that scope is used |
5647 | // by both sets. If not, the declaration of the scope can be safely omitted. |
5648 | // The MDNode of the scope can be omitted as well for the instructions that are |
5649 | // part of this function. We do not do that at this point, as this might become |
5650 | // too time consuming to do. |
5651 | class AliasScopeTracker { |
5652 | SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists; |
5653 | SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists; |
5654 | |
5655 | public: |
5656 | void analyse(Instruction *I) { |
5657 | // This seems to be faster than checking 'mayReadOrWriteMemory()'. |
5658 | if (!I->hasMetadataOtherThanDebugLoc()) |
5659 | return; |
5660 | |
5661 | auto Track = [](Metadata *ScopeList, auto &Container) { |
5662 | const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList); |
5663 | if (!MDScopeList || !Container.insert(MDScopeList).second) |
5664 | return; |
5665 | for (const auto &MDOperand : MDScopeList->operands()) |
5666 | if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand)) |
5667 | Container.insert(MDScope); |
5668 | }; |
5669 | |
5670 | Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists); |
5671 | Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists); |
5672 | } |
5673 | |
5674 | bool isNoAliasScopeDeclDead(Instruction *Inst) { |
5675 | NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst); |
5676 | if (!Decl) |
5677 | return false; |
5678 | |
5679 | assert(Decl->use_empty() && |
5680 | "llvm.experimental.noalias.scope.decl in use ?" ); |
5681 | const MDNode *MDSL = Decl->getScopeList(); |
5682 | assert(MDSL->getNumOperands() == 1 && |
5683 | "llvm.experimental.noalias.scope should refer to a single scope" ); |
5684 | auto &MDOperand = MDSL->getOperand(I: 0); |
5685 | if (auto *MD = dyn_cast<MDNode>(Val: MDOperand)) |
5686 | return !UsedAliasScopesAndLists.contains(Ptr: MD) || |
5687 | !UsedNoAliasScopesAndLists.contains(Ptr: MD); |
5688 | |
5689 | // Not an MDNode ? throw away. |
5690 | return true; |
5691 | } |
5692 | }; |
5693 | |
5694 | /// Populate the IC worklist from a function, by walking it in reverse |
5695 | /// post-order and adding all reachable code to the worklist. |
5696 | /// |
5697 | /// This has a couple of tricks to make the code faster and more powerful. In |
5698 | /// particular, we constant fold and DCE instructions as we go, to avoid adding |
5699 | /// them to the worklist (this significantly speeds up instcombine on code where |
5700 | /// many instructions are dead or constant). Additionally, if we find a branch |
5701 | /// whose condition is a known constant, we only visit the reachable successors. |
5702 | bool InstCombinerImpl::prepareWorklist(Function &F) { |
5703 | bool MadeIRChange = false; |
5704 | SmallPtrSet<BasicBlock *, 32> LiveBlocks; |
5705 | SmallVector<Instruction *, 128> InstrsForInstructionWorklist; |
5706 | DenseMap<Constant *, Constant *> FoldedConstants; |
5707 | AliasScopeTracker SeenAliasScopes; |
5708 | |
5709 | auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) { |
5710 | for (BasicBlock *Succ : successors(BB)) |
5711 | if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second) |
5712 | for (PHINode &PN : Succ->phis()) |
5713 | for (Use &U : PN.incoming_values()) |
5714 | if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) { |
5715 | U.set(PoisonValue::get(T: PN.getType())); |
5716 | MadeIRChange = true; |
5717 | } |
5718 | }; |
5719 | |
5720 | for (BasicBlock *BB : RPOT) { |
5721 | if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) { |
5722 | return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred); |
5723 | })) { |
5724 | HandleOnlyLiveSuccessor(BB, nullptr); |
5725 | continue; |
5726 | } |
5727 | LiveBlocks.insert(Ptr: BB); |
5728 | |
5729 | for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) { |
5730 | // ConstantProp instruction if trivially constant. |
5731 | if (!Inst.use_empty() && |
5732 | (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0)))) |
5733 | if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) { |
5734 | LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst |
5735 | << '\n'); |
5736 | Inst.replaceAllUsesWith(V: C); |
5737 | ++NumConstProp; |
5738 | if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI)) |
5739 | Inst.eraseFromParent(); |
5740 | MadeIRChange = true; |
5741 | continue; |
5742 | } |
5743 | |
5744 | // See if we can constant fold its operands. |
5745 | for (Use &U : Inst.operands()) { |
5746 | if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U)) |
5747 | continue; |
5748 | |
5749 | auto *C = cast<Constant>(Val&: U); |
5750 | Constant *&FoldRes = FoldedConstants[C]; |
5751 | if (!FoldRes) |
5752 | FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI); |
5753 | |
5754 | if (FoldRes != C) { |
5755 | LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst |
5756 | << "\n Old = " << *C |
5757 | << "\n New = " << *FoldRes << '\n'); |
5758 | U = FoldRes; |
5759 | MadeIRChange = true; |
5760 | } |
5761 | } |
5762 | |
5763 | // Skip processing debug and pseudo intrinsics in InstCombine. Processing |
5764 | // these call instructions consumes non-trivial amount of time and |
5765 | // provides no value for the optimization. |
5766 | if (!Inst.isDebugOrPseudoInst()) { |
5767 | InstrsForInstructionWorklist.push_back(Elt: &Inst); |
5768 | SeenAliasScopes.analyse(I: &Inst); |
5769 | } |
5770 | } |
5771 | |
5772 | // If this is a branch or switch on a constant, mark only the single |
5773 | // live successor. Otherwise assume all successors are live. |
5774 | Instruction *TI = BB->getTerminator(); |
5775 | if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) { |
5776 | if (isa<UndefValue>(Val: BI->getCondition())) { |
5777 | // Branch on undef is UB. |
5778 | HandleOnlyLiveSuccessor(BB, nullptr); |
5779 | continue; |
5780 | } |
5781 | if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
5782 | bool CondVal = Cond->getZExtValue(); |
5783 | HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal)); |
5784 | continue; |
5785 | } |
5786 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) { |
5787 | if (isa<UndefValue>(Val: SI->getCondition())) { |
5788 | // Switch on undef is UB. |
5789 | HandleOnlyLiveSuccessor(BB, nullptr); |
5790 | continue; |
5791 | } |
5792 | if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) { |
5793 | HandleOnlyLiveSuccessor(BB, |
5794 | SI->findCaseValue(C: Cond)->getCaseSuccessor()); |
5795 | continue; |
5796 | } |
5797 | } |
5798 | } |
5799 | |
5800 | // Remove instructions inside unreachable blocks. This prevents the |
5801 | // instcombine code from having to deal with some bad special cases, and |
5802 | // reduces use counts of instructions. |
5803 | for (BasicBlock &BB : F) { |
5804 | if (LiveBlocks.count(Ptr: &BB)) |
5805 | continue; |
5806 | |
5807 | unsigned NumDeadInstInBB; |
5808 | NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB); |
5809 | |
5810 | MadeIRChange |= NumDeadInstInBB != 0; |
5811 | NumDeadInst += NumDeadInstInBB; |
5812 | } |
5813 | |
5814 | // Once we've found all of the instructions to add to instcombine's worklist, |
5815 | // add them in reverse order. This way instcombine will visit from the top |
5816 | // of the function down. This jives well with the way that it adds all uses |
5817 | // of instructions to the worklist after doing a transformation, thus avoiding |
5818 | // some N^2 behavior in pathological cases. |
5819 | Worklist.reserve(Size: InstrsForInstructionWorklist.size()); |
5820 | for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) { |
5821 | // DCE instruction if trivially dead. As we iterate in reverse program |
5822 | // order here, we will clean up whole chains of dead instructions. |
5823 | if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) || |
5824 | SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) { |
5825 | ++NumDeadInst; |
5826 | LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); |
5827 | salvageDebugInfo(I&: *Inst); |
5828 | Inst->eraseFromParent(); |
5829 | MadeIRChange = true; |
5830 | continue; |
5831 | } |
5832 | |
5833 | Worklist.push(I: Inst); |
5834 | } |
5835 | |
5836 | return MadeIRChange; |
5837 | } |
5838 | |
5839 | void InstCombiner::computeBackEdges() { |
5840 | // Collect backedges. |
5841 | SmallPtrSet<BasicBlock *, 16> Visited; |
5842 | for (BasicBlock *BB : RPOT) { |
5843 | Visited.insert(Ptr: BB); |
5844 | for (BasicBlock *Succ : successors(BB)) |
5845 | if (Visited.contains(Ptr: Succ)) |
5846 | BackEdges.insert(V: {BB, Succ}); |
5847 | } |
5848 | ComputedBackEdges = true; |
5849 | } |
5850 | |
5851 | static bool combineInstructionsOverFunction( |
5852 | Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, |
5853 | AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, |
5854 | DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, |
5855 | BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, |
5856 | const InstCombineOptions &Opts) { |
5857 | auto &DL = F.getDataLayout(); |
5858 | bool VerifyFixpoint = Opts.VerifyFixpoint && |
5859 | !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint" ); |
5860 | |
5861 | /// Builder - This is an IRBuilder that automatically inserts new |
5862 | /// instructions into the worklist when they are created. |
5863 | IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder( |
5864 | F.getContext(), TargetFolder(DL), |
5865 | IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { |
5866 | Worklist.add(I); |
5867 | if (auto *Assume = dyn_cast<AssumeInst>(Val: I)) |
5868 | AC.registerAssumption(CI: Assume); |
5869 | })); |
5870 | |
5871 | ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front()); |
5872 | |
5873 | // Lower dbg.declare intrinsics otherwise their value may be clobbered |
5874 | // by instcombiner. |
5875 | bool MadeIRChange = false; |
5876 | if (ShouldLowerDbgDeclare) |
5877 | MadeIRChange = LowerDbgDeclare(F); |
5878 | |
5879 | // Iterate while there is work to do. |
5880 | unsigned Iteration = 0; |
5881 | while (true) { |
5882 | if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) { |
5883 | LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations |
5884 | << " on " << F.getName() |
5885 | << " reached; stopping without verifying fixpoint\n" ); |
5886 | break; |
5887 | } |
5888 | |
5889 | ++Iteration; |
5890 | ++NumWorklistIterations; |
5891 | LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " |
5892 | << F.getName() << "\n" ); |
5893 | |
5894 | InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, |
5895 | ORE, BFI, BPI, PSI, DL, RPOT); |
5896 | IC.MaxArraySizeForCombine = MaxArraySize; |
5897 | bool MadeChangeInThisIteration = IC.prepareWorklist(F); |
5898 | MadeChangeInThisIteration |= IC.run(); |
5899 | if (!MadeChangeInThisIteration) |
5900 | break; |
5901 | |
5902 | MadeIRChange = true; |
5903 | if (Iteration > Opts.MaxIterations) { |
5904 | reportFatalUsageError( |
5905 | reason: "Instruction Combining on " + Twine(F.getName()) + |
5906 | " did not reach a fixpoint after " + Twine(Opts.MaxIterations) + |
5907 | " iterations. " + |
5908 | "Use 'instcombine<no-verify-fixpoint>' or function attribute " |
5909 | "'instcombine-no-verify-fixpoint' to suppress this error." ); |
5910 | } |
5911 | } |
5912 | |
5913 | if (Iteration == 1) |
5914 | ++NumOneIteration; |
5915 | else if (Iteration == 2) |
5916 | ++NumTwoIterations; |
5917 | else if (Iteration == 3) |
5918 | ++NumThreeIterations; |
5919 | else |
5920 | ++NumFourOrMoreIterations; |
5921 | |
5922 | return MadeIRChange; |
5923 | } |
5924 | |
5925 | InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {} |
5926 | |
5927 | void InstCombinePass::printPipeline( |
5928 | raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
5929 | static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline( |
5930 | OS, MapClassName2PassName); |
5931 | OS << '<'; |
5932 | OS << "max-iterations=" << Options.MaxIterations << ";" ; |
5933 | OS << (Options.VerifyFixpoint ? "" : "no-" ) << "verify-fixpoint" ; |
5934 | OS << '>'; |
5935 | } |
5936 | |
5937 | char InstCombinePass::ID = 0; |
5938 | |
5939 | PreservedAnalyses InstCombinePass::run(Function &F, |
5940 | FunctionAnalysisManager &AM) { |
5941 | auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F); |
5942 | // No changes since last InstCombine pass, exit early. |
5943 | if (LRT.shouldSkip(ID: &ID)) |
5944 | return PreservedAnalyses::all(); |
5945 | |
5946 | auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F); |
5947 | auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F); |
5948 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F); |
5949 | auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F); |
5950 | auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F); |
5951 | |
5952 | auto *AA = &AM.getResult<AAManager>(IR&: F); |
5953 | auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F); |
5954 | ProfileSummaryInfo *PSI = |
5955 | MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent()); |
5956 | auto *BFI = (PSI && PSI->hasProfileSummary()) ? |
5957 | &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr; |
5958 | auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F); |
5959 | |
5960 | if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
5961 | BFI, BPI, PSI, Opts: Options)) { |
5962 | // No changes, all analyses are preserved. |
5963 | LRT.update(ID: &ID, /*Changed=*/false); |
5964 | return PreservedAnalyses::all(); |
5965 | } |
5966 | |
5967 | // Mark all the analyses that instcombine updates as preserved. |
5968 | PreservedAnalyses PA; |
5969 | LRT.update(ID: &ID, /*Changed=*/true); |
5970 | PA.preserve<LastRunTrackingAnalysis>(); |
5971 | PA.preserveSet<CFGAnalyses>(); |
5972 | return PA; |
5973 | } |
5974 | |
5975 | void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { |
5976 | AU.setPreservesCFG(); |
5977 | AU.addRequired<AAResultsWrapperPass>(); |
5978 | AU.addRequired<AssumptionCacheTracker>(); |
5979 | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
5980 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
5981 | AU.addRequired<DominatorTreeWrapperPass>(); |
5982 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); |
5983 | AU.addPreserved<DominatorTreeWrapperPass>(); |
5984 | AU.addPreserved<AAResultsWrapperPass>(); |
5985 | AU.addPreserved<BasicAAWrapperPass>(); |
5986 | AU.addPreserved<GlobalsAAWrapperPass>(); |
5987 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
5988 | LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); |
5989 | } |
5990 | |
5991 | bool InstructionCombiningPass::runOnFunction(Function &F) { |
5992 | if (skipFunction(F)) |
5993 | return false; |
5994 | |
5995 | // Required analyses. |
5996 | auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
5997 | auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); |
5998 | auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); |
5999 | auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
6000 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
6001 | auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); |
6002 | |
6003 | // Optional analyses. |
6004 | ProfileSummaryInfo *PSI = |
6005 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
6006 | BlockFrequencyInfo *BFI = |
6007 | (PSI && PSI->hasProfileSummary()) ? |
6008 | &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : |
6009 | nullptr; |
6010 | BranchProbabilityInfo *BPI = nullptr; |
6011 | if (auto *WrapperPass = |
6012 | getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>()) |
6013 | BPI = &WrapperPass->getBPI(); |
6014 | |
6015 | return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, |
6016 | BFI, BPI, PSI, Opts: InstCombineOptions()); |
6017 | } |
6018 | |
6019 | char InstructionCombiningPass::ID = 0; |
6020 | |
6021 | InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) { |
6022 | initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry()); |
6023 | } |
6024 | |
6025 | INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine" , |
6026 | "Combine redundant instructions" , false, false) |
6027 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
6028 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
6029 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
6030 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
6031 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
6032 | INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) |
6033 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) |
6034 | INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) |
6035 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) |
6036 | INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine" , |
6037 | "Combine redundant instructions" , false, false) |
6038 | |
6039 | // Initialization Routines |
6040 | void llvm::initializeInstCombine(PassRegistry &Registry) { |
6041 | initializeInstructionCombiningPassPass(Registry); |
6042 | } |
6043 | |
6044 | FunctionPass *llvm::createInstructionCombiningPass() { |
6045 | return new InstructionCombiningPass(); |
6046 | } |
6047 | |