1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AssumptionCache.h"
45#include "llvm/Analysis/BasicAliasAnalysis.h"
46#include "llvm/Analysis/BlockFrequencyInfo.h"
47#include "llvm/Analysis/CFG.h"
48#include "llvm/Analysis/ConstantFolding.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/Analysis/InstructionSimplify.h"
51#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53#include "llvm/Analysis/MemoryBuiltins.h"
54#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55#include "llvm/Analysis/ProfileSummaryInfo.h"
56#include "llvm/Analysis/TargetFolder.h"
57#include "llvm/Analysis/TargetLibraryInfo.h"
58#include "llvm/Analysis/TargetTransformInfo.h"
59#include "llvm/Analysis/Utils/Local.h"
60#include "llvm/Analysis/ValueTracking.h"
61#include "llvm/Analysis/VectorUtils.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/Dominators.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
83#include "llvm/IR/PatternMatch.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
89#include "llvm/InitializePasses.h"
90#include "llvm/Support/Casting.h"
91#include "llvm/Support/CommandLine.h"
92#include "llvm/Support/Compiler.h"
93#include "llvm/Support/Debug.h"
94#include "llvm/Support/DebugCounter.h"
95#include "llvm/Support/ErrorHandling.h"
96#include "llvm/Support/KnownBits.h"
97#include "llvm/Support/KnownFPClass.h"
98#include "llvm/Support/raw_ostream.h"
99#include "llvm/Transforms/InstCombine/InstCombine.h"
100#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101#include "llvm/Transforms/Utils/Local.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
111#include "llvm/Transforms/Utils/InstructionWorklist.h"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(Val: true));
138
139static cl::opt<unsigned> MaxSinkNumUsers(
140 "instcombine-max-sink-users", cl::init(Val: 32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
143static cl::opt<unsigned>
144MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147namespace llvm {
148extern cl::opt<bool> ProfcheckDisableMetadataFixes;
149} // end namespace llvm
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(Val: true));
160
161std::optional<Instruction *>
162InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
163 // Handle target specific intrinsics
164 if (II.getCalledFunction()->isTargetIntrinsic()) {
165 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
166 }
167 return std::nullopt;
168}
169
170std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
171 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
172 bool &KnownBitsComputed) {
173 // Handle target specific intrinsics
174 if (II.getCalledFunction()->isTargetIntrinsic()) {
175 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
176 IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
177 }
178 return std::nullopt;
179}
180
181std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
182 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
183 APInt &PoisonElts2, APInt &PoisonElts3,
184 std::function<void(Instruction *, unsigned, APInt, APInt &)>
185 SimplifyAndSetOp) {
186 // Handle target specific intrinsics
187 if (II.getCalledFunction()->isTargetIntrinsic()) {
188 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
189 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
190 SimplifyAndSetOp);
191 }
192 return std::nullopt;
193}
194
195bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
196 // Approved exception for TTI use: This queries a legality property of the
197 // target, not an profitability heuristic. Ideally this should be part of
198 // DataLayout instead.
199 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
200}
201
202Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
203 if (!RewriteGEP)
204 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
205
206 IRBuilderBase::InsertPointGuard Guard(Builder);
207 auto *Inst = dyn_cast<Instruction>(Val: GEP);
208 if (Inst)
209 Builder.SetInsertPoint(Inst);
210
211 Value *Offset = EmitGEPOffset(GEP);
212 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
213 if (Inst && !GEP->hasAllConstantIndices() &&
214 !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) {
215 replaceInstUsesWith(
216 I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
217 IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
218 eraseInstFromFunction(I&: *Inst);
219 }
220 return Offset;
221}
222
223Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
224 GEPNoWrapFlags NW, Type *IdxTy,
225 bool RewriteGEPs) {
226 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
227 if (Sum)
228 return Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
229 HasNSW: NW.isInBounds());
230 else
231 return Offset;
232 };
233
234 Value *Sum = nullptr;
235 Value *OneUseSum = nullptr;
236 Value *OneUseBase = nullptr;
237 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
238 for (GEPOperator *GEP : reverse(C&: GEPs)) {
239 Value *Offset;
240 {
241 // Expand the offset at the point of the previous GEP to enable rewriting.
242 // However, use the original insertion point for calculating Sum.
243 IRBuilderBase::InsertPointGuard Guard(Builder);
244 auto *Inst = dyn_cast<Instruction>(Val: GEP);
245 if (RewriteGEPs && Inst)
246 Builder.SetInsertPoint(Inst);
247
248 Offset = llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
249 if (Offset->getType() != IdxTy)
250 Offset = Builder.CreateVectorSplat(
251 EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
252 if (GEP->hasOneUse()) {
253 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
254 OneUseSum = Add(OneUseSum, Offset);
255 OneUseFlags = OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
256 if (!OneUseBase)
257 OneUseBase = GEP->getPointerOperand();
258 continue;
259 }
260
261 if (OneUseSum)
262 Offset = Add(OneUseSum, Offset);
263
264 // Rewrite the GEP to reuse the computed offset. This also includes
265 // offsets from preceding one-use GEPs.
266 if (RewriteGEPs && Inst &&
267 !(GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8) &&
268 GEP->getOperand(i_nocapture: 1) == Offset)) {
269 replaceInstUsesWith(
270 I&: *Inst,
271 V: Builder.CreatePtrAdd(
272 Ptr: OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, Name: "",
273 NW: OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags())));
274 eraseInstFromFunction(I&: *Inst);
275 }
276 }
277
278 Sum = Add(Sum, Offset);
279 OneUseSum = OneUseBase = nullptr;
280 OneUseFlags = GEPNoWrapFlags::all();
281 }
282 if (OneUseSum)
283 Sum = Add(Sum, OneUseSum);
284 if (!Sum)
285 return Constant::getNullValue(Ty: IdxTy);
286 return Sum;
287}
288
289/// Legal integers and common types are considered desirable. This is used to
290/// avoid creating instructions with types that may not be supported well by the
291/// the backend.
292/// NOTE: This treats i8, i16 and i32 specially because they are common
293/// types in frontend languages.
294bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
295 switch (BitWidth) {
296 case 8:
297 case 16:
298 case 32:
299 return true;
300 default:
301 return DL.isLegalInteger(Width: BitWidth);
302 }
303}
304
305/// Return true if it is desirable to convert an integer computation from a
306/// given bit width to a new bit width.
307/// We don't want to convert from a legal or desirable type (like i8) to an
308/// illegal type or from a smaller to a larger illegal type. A width of '1'
309/// is always treated as a desirable type because i1 is a fundamental type in
310/// IR, and there are many specialized optimizations for i1 types.
311/// Common/desirable widths are equally treated as legal to convert to, in
312/// order to open up more combining opportunities.
313bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
314 unsigned ToWidth) const {
315 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
316 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
317
318 // Convert to desirable widths even if they are not legal types.
319 // Only shrink types, to prevent infinite loops.
320 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
321 return true;
322
323 // If this is a legal or desiable integer from type, and the result would be
324 // an illegal type, don't do the transformation.
325 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
326 return false;
327
328 // Otherwise, if both are illegal, do not increase the size of the result. We
329 // do allow things like i160 -> i64, but not i64 -> i160.
330 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
331 return false;
332
333 return true;
334}
335
336/// Return true if it is desirable to convert a computation from 'From' to 'To'.
337/// We don't want to convert from a legal to an illegal type or from a smaller
338/// to a larger illegal type. i1 is always treated as a legal type because it is
339/// a fundamental type in IR, and there are many specialized optimizations for
340/// i1 types.
341bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
342 // TODO: This could be extended to allow vectors. Datalayout changes might be
343 // needed to properly support that.
344 if (!From->isIntegerTy() || !To->isIntegerTy())
345 return false;
346
347 unsigned FromWidth = From->getPrimitiveSizeInBits();
348 unsigned ToWidth = To->getPrimitiveSizeInBits();
349 return shouldChangeType(FromWidth, ToWidth);
350}
351
352// Return true, if No Signed Wrap should be maintained for I.
353// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
354// where both B and C should be ConstantInts, results in a constant that does
355// not overflow. This function only handles the Add/Sub/Mul opcodes. For
356// all other opcodes, the function conservatively returns false.
357static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
358 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
359 if (!OBO || !OBO->hasNoSignedWrap())
360 return false;
361
362 const APInt *BVal, *CVal;
363 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
364 return false;
365
366 // We reason about Add/Sub/Mul Only.
367 bool Overflow = false;
368 switch (I.getOpcode()) {
369 case Instruction::Add:
370 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
371 break;
372 case Instruction::Sub:
373 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
374 break;
375 case Instruction::Mul:
376 (void)BVal->smul_ov(RHS: *CVal, Overflow);
377 break;
378 default:
379 // Conservatively return false for other opcodes.
380 return false;
381 }
382 return !Overflow;
383}
384
385static bool hasNoUnsignedWrap(BinaryOperator &I) {
386 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
387 return OBO && OBO->hasNoUnsignedWrap();
388}
389
390static bool hasNoSignedWrap(BinaryOperator &I) {
391 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
392 return OBO && OBO->hasNoSignedWrap();
393}
394
395/// Conservatively clears subclassOptionalData after a reassociation or
396/// commutation. We preserve fast-math flags when applicable as they can be
397/// preserved.
398static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
399 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
400 if (!FPMO) {
401 I.clearSubclassOptionalData();
402 return;
403 }
404
405 FastMathFlags FMF = I.getFastMathFlags();
406 I.clearSubclassOptionalData();
407 I.setFastMathFlags(FMF);
408}
409
410/// Combine constant operands of associative operations either before or after a
411/// cast to eliminate one of the associative operations:
412/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
413/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
414static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
415 InstCombinerImpl &IC) {
416 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
417 if (!Cast || !Cast->hasOneUse())
418 return false;
419
420 // TODO: Enhance logic for other casts and remove this check.
421 auto CastOpcode = Cast->getOpcode();
422 if (CastOpcode != Instruction::ZExt)
423 return false;
424
425 // TODO: Enhance logic for other BinOps and remove this check.
426 if (!BinOp1->isBitwiseLogicOp())
427 return false;
428
429 auto AssocOpcode = BinOp1->getOpcode();
430 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
431 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
432 return false;
433
434 Constant *C1, *C2;
435 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
436 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
437 return false;
438
439 // TODO: This assumes a zext cast.
440 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
441 // to the destination type might lose bits.
442
443 // Fold the constants together in the destination type:
444 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
445 const DataLayout &DL = IC.getDataLayout();
446 Type *DestTy = C1->getType();
447 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
448 if (!CastC2)
449 return false;
450 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
451 if (!FoldedC)
452 return false;
453
454 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
455 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
456 BinOp1->dropPoisonGeneratingFlags();
457 Cast->dropPoisonGeneratingFlags();
458 return true;
459}
460
461// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
462// inttoptr ( ptrtoint (x) ) --> x
463Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
464 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
465 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
466 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
467 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
468 Type *CastTy = IntToPtr->getDestTy();
469 if (PtrToInt &&
470 CastTy->getPointerAddressSpace() ==
471 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
472 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
473 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
474 return PtrToInt->getOperand(i_nocapture: 0);
475 }
476 return nullptr;
477}
478
479/// This performs a few simplifications for operators that are associative or
480/// commutative:
481///
482/// Commutative operators:
483///
484/// 1. Order operands such that they are listed from right (least complex) to
485/// left (most complex). This puts constants before unary operators before
486/// binary operators.
487///
488/// Associative operators:
489///
490/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
491/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
492///
493/// Associative and commutative operators:
494///
495/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
496/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
497/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
498/// if C1 and C2 are constants.
499bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
500 Instruction::BinaryOps Opcode = I.getOpcode();
501 bool Changed = false;
502
503 do {
504 // Order operands such that they are listed from right (least complex) to
505 // left (most complex). This puts constants before unary operators before
506 // binary operators.
507 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
508 getComplexity(V: I.getOperand(i_nocapture: 1)))
509 Changed = !I.swapOperands();
510
511 if (I.isCommutative()) {
512 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
513 replaceOperand(I, OpNum: 0, V: Pair->first);
514 replaceOperand(I, OpNum: 1, V: Pair->second);
515 Changed = true;
516 }
517 }
518
519 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
520 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
521
522 if (I.isAssociative()) {
523 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
524 if (Op0 && Op0->getOpcode() == Opcode) {
525 Value *A = Op0->getOperand(i_nocapture: 0);
526 Value *B = Op0->getOperand(i_nocapture: 1);
527 Value *C = I.getOperand(i_nocapture: 1);
528
529 // Does "B op C" simplify?
530 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
531 // It simplifies to V. Form "A op V".
532 replaceOperand(I, OpNum: 0, V: A);
533 replaceOperand(I, OpNum: 1, V);
534 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
535 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
536
537 // Conservatively clear all optional flags since they may not be
538 // preserved by the reassociation. Reset nsw/nuw based on the above
539 // analysis.
540 ClearSubclassDataAfterReassociation(I);
541
542 // Note: this is only valid because SimplifyBinOp doesn't look at
543 // the operands to Op0.
544 if (IsNUW)
545 I.setHasNoUnsignedWrap(true);
546
547 if (IsNSW)
548 I.setHasNoSignedWrap(true);
549
550 Changed = true;
551 ++NumReassoc;
552 continue;
553 }
554 }
555
556 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
557 if (Op1 && Op1->getOpcode() == Opcode) {
558 Value *A = I.getOperand(i_nocapture: 0);
559 Value *B = Op1->getOperand(i_nocapture: 0);
560 Value *C = Op1->getOperand(i_nocapture: 1);
561
562 // Does "A op B" simplify?
563 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
564 // It simplifies to V. Form "V op C".
565 replaceOperand(I, OpNum: 0, V);
566 replaceOperand(I, OpNum: 1, V: C);
567 // Conservatively clear the optional flags, since they may not be
568 // preserved by the reassociation.
569 ClearSubclassDataAfterReassociation(I);
570 Changed = true;
571 ++NumReassoc;
572 continue;
573 }
574 }
575 }
576
577 if (I.isAssociative() && I.isCommutative()) {
578 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
579 Changed = true;
580 ++NumReassoc;
581 continue;
582 }
583
584 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
585 if (Op0 && Op0->getOpcode() == Opcode) {
586 Value *A = Op0->getOperand(i_nocapture: 0);
587 Value *B = Op0->getOperand(i_nocapture: 1);
588 Value *C = I.getOperand(i_nocapture: 1);
589
590 // Does "C op A" simplify?
591 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
592 // It simplifies to V. Form "V op B".
593 replaceOperand(I, OpNum: 0, V);
594 replaceOperand(I, OpNum: 1, V: B);
595 // Conservatively clear the optional flags, since they may not be
596 // preserved by the reassociation.
597 ClearSubclassDataAfterReassociation(I);
598 Changed = true;
599 ++NumReassoc;
600 continue;
601 }
602 }
603
604 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
605 if (Op1 && Op1->getOpcode() == Opcode) {
606 Value *A = I.getOperand(i_nocapture: 0);
607 Value *B = Op1->getOperand(i_nocapture: 0);
608 Value *C = Op1->getOperand(i_nocapture: 1);
609
610 // Does "C op A" simplify?
611 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
612 // It simplifies to V. Form "B op V".
613 replaceOperand(I, OpNum: 0, V: B);
614 replaceOperand(I, OpNum: 1, V);
615 // Conservatively clear the optional flags, since they may not be
616 // preserved by the reassociation.
617 ClearSubclassDataAfterReassociation(I);
618 Changed = true;
619 ++NumReassoc;
620 continue;
621 }
622 }
623
624 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
625 // if C1 and C2 are constants.
626 Value *A, *B;
627 Constant *C1, *C2, *CRes;
628 if (Op0 && Op1 &&
629 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
630 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
631 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
632 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
633 bool IsNUW = hasNoUnsignedWrap(I) &&
634 hasNoUnsignedWrap(I&: *Op0) &&
635 hasNoUnsignedWrap(I&: *Op1);
636 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
637 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
638 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
639
640 if (isa<FPMathOperator>(Val: NewBO)) {
641 FastMathFlags Flags = I.getFastMathFlags() &
642 Op0->getFastMathFlags() &
643 Op1->getFastMathFlags();
644 NewBO->setFastMathFlags(Flags);
645 }
646 InsertNewInstWith(New: NewBO, Old: I.getIterator());
647 NewBO->takeName(V: Op1);
648 replaceOperand(I, OpNum: 0, V: NewBO);
649 replaceOperand(I, OpNum: 1, V: CRes);
650 // Conservatively clear the optional flags, since they may not be
651 // preserved by the reassociation.
652 ClearSubclassDataAfterReassociation(I);
653 if (IsNUW)
654 I.setHasNoUnsignedWrap(true);
655
656 Changed = true;
657 continue;
658 }
659 }
660
661 // No further simplifications.
662 return Changed;
663 } while (true);
664}
665
666/// Return whether "X LOp (Y ROp Z)" is always equal to
667/// "(X LOp Y) ROp (X LOp Z)".
668static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
669 Instruction::BinaryOps ROp) {
670 // X & (Y | Z) <--> (X & Y) | (X & Z)
671 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
672 if (LOp == Instruction::And)
673 return ROp == Instruction::Or || ROp == Instruction::Xor;
674
675 // X | (Y & Z) <--> (X | Y) & (X | Z)
676 if (LOp == Instruction::Or)
677 return ROp == Instruction::And;
678
679 // X * (Y + Z) <--> (X * Y) + (X * Z)
680 // X * (Y - Z) <--> (X * Y) - (X * Z)
681 if (LOp == Instruction::Mul)
682 return ROp == Instruction::Add || ROp == Instruction::Sub;
683
684 return false;
685}
686
687/// Return whether "(X LOp Y) ROp Z" is always equal to
688/// "(X ROp Z) LOp (Y ROp Z)".
689static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
690 Instruction::BinaryOps ROp) {
691 if (Instruction::isCommutative(Opcode: ROp))
692 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
693
694 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
695 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
696
697 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
698 // but this requires knowing that the addition does not overflow and other
699 // such subtleties.
700}
701
702/// This function returns identity value for given opcode, which can be used to
703/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
704static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
705 if (isa<Constant>(Val: V))
706 return nullptr;
707
708 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
709}
710
711/// This function predicates factorization using distributive laws. By default,
712/// it just returns the 'Op' inputs. But for special-cases like
713/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
714/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
715/// allow more factorization opportunities.
716static Instruction::BinaryOps
717getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
718 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
719 assert(Op && "Expected a binary operator");
720 LHS = Op->getOperand(i_nocapture: 0);
721 RHS = Op->getOperand(i_nocapture: 1);
722 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
723 Constant *C;
724 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
725 // X << C --> X * (1 << C)
726 RHS = ConstantFoldBinaryInstruction(
727 Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C);
728 assert(RHS && "Constant folding of immediate constants failed");
729 return Instruction::Mul;
730 }
731 // TODO: We can add other conversions e.g. shr => div etc.
732 }
733 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
734 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
735 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
736 // lshr nneg C, X --> ashr nneg C, X
737 return Instruction::AShr;
738 }
739 }
740 return Op->getOpcode();
741}
742
743/// This tries to simplify binary operations by factorizing out common terms
744/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
745static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
746 InstCombiner::BuilderTy &Builder,
747 Instruction::BinaryOps InnerOpcode, Value *A,
748 Value *B, Value *C, Value *D) {
749 assert(A && B && C && D && "All values must be provided");
750
751 Value *V = nullptr;
752 Value *RetVal = nullptr;
753 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
754 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
755
756 // Does "X op' Y" always equal "Y op' X"?
757 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
758
759 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
760 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
761 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
762 // commutative case, "(A op' B) op (C op' A)"?
763 if (A == C || (InnerCommutative && A == D)) {
764 if (A != C)
765 std::swap(a&: C, b&: D);
766 // Consider forming "A op' (B op D)".
767 // If "B op D" simplifies then it can be formed with no cost.
768 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
769
770 // If "B op D" doesn't simplify then only go on if one of the existing
771 // operations "A op' B" and "C op' D" will be zapped as no longer used.
772 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
773 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
774 if (V)
775 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
776 }
777 }
778
779 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
780 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
781 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
782 // commutative case, "(A op' B) op (B op' D)"?
783 if (B == D || (InnerCommutative && B == C)) {
784 if (B != D)
785 std::swap(a&: C, b&: D);
786 // Consider forming "(A op C) op' B".
787 // If "A op C" simplifies then it can be formed with no cost.
788 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
789
790 // If "A op C" doesn't simplify then only go on if one of the existing
791 // operations "A op' B" and "C op' D" will be zapped as no longer used.
792 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
793 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
794 if (V)
795 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
796 }
797 }
798
799 if (!RetVal)
800 return nullptr;
801
802 ++NumFactor;
803 RetVal->takeName(V: &I);
804
805 // Try to add no-overflow flags to the final value.
806 if (isa<BinaryOperator>(Val: RetVal)) {
807 bool HasNSW = false;
808 bool HasNUW = false;
809 if (isa<OverflowingBinaryOperator>(Val: &I)) {
810 HasNSW = I.hasNoSignedWrap();
811 HasNUW = I.hasNoUnsignedWrap();
812 }
813 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
814 HasNSW &= LOBO->hasNoSignedWrap();
815 HasNUW &= LOBO->hasNoUnsignedWrap();
816 }
817
818 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
819 HasNSW &= ROBO->hasNoSignedWrap();
820 HasNUW &= ROBO->hasNoUnsignedWrap();
821 }
822
823 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
824 // We can propagate 'nsw' if we know that
825 // %Y = mul nsw i16 %X, C
826 // %Z = add nsw i16 %Y, %X
827 // =>
828 // %Z = mul nsw i16 %X, C+1
829 //
830 // iff C+1 isn't INT_MIN
831 const APInt *CInt;
832 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
833 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
834
835 // nuw can be propagated with any constant or nuw value.
836 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
837 }
838 }
839 return RetVal;
840}
841
842// If `I` has one Const operand and the other matches `(ctpop (not x))`,
843// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
844// This is only useful is the new subtract can fold so we only handle the
845// following cases:
846// 1) (add/sub/disjoint_or C, (ctpop (not x))
847// -> (add/sub/disjoint_or C', (ctpop x))
848// 1) (cmp pred C, (ctpop (not x))
849// -> (cmp pred C', (ctpop x))
850Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
851 unsigned Opc = I->getOpcode();
852 unsigned ConstIdx = 1;
853 switch (Opc) {
854 default:
855 return nullptr;
856 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
857 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
858 // is constant.
859 case Instruction::Sub:
860 ConstIdx = 0;
861 break;
862 case Instruction::ICmp:
863 // Signed predicates aren't correct in some edge cases like for i2 types, as
864 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
865 // comparisons against it are simplfied to unsigned.
866 if (cast<ICmpInst>(Val: I)->isSigned())
867 return nullptr;
868 break;
869 case Instruction::Or:
870 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
871 return nullptr;
872 [[fallthrough]];
873 case Instruction::Add:
874 break;
875 }
876
877 Value *Op;
878 // Find ctpop.
879 if (!match(V: I->getOperand(i: 1 - ConstIdx),
880 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
881 return nullptr;
882
883 Constant *C;
884 // Check other operand is ImmConstant.
885 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
886 return nullptr;
887
888 Type *Ty = Op->getType();
889 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
890 // Need extra check for icmp. Note if this check is true, it generally means
891 // the icmp will simplify to true/false.
892 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
893 Constant *Cmp =
894 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
895 if (!Cmp || !Cmp->isNullValue())
896 return nullptr;
897 }
898
899 // Check we can invert `(not x)` for free.
900 bool Consumes = false;
901 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
902 return nullptr;
903 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
904 assert(NotOp != nullptr &&
905 "Desync between isFreeToInvert and getFreelyInverted");
906
907 Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
908
909 Value *R = nullptr;
910
911 // Do the transformation here to avoid potentially introducing an infinite
912 // loop.
913 switch (Opc) {
914 case Instruction::Sub:
915 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
916 break;
917 case Instruction::Or:
918 case Instruction::Add:
919 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
920 break;
921 case Instruction::ICmp:
922 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
923 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
924 break;
925 default:
926 llvm_unreachable("Unhandled Opcode");
927 }
928 assert(R != nullptr);
929 return replaceInstUsesWith(I&: *I, V: R);
930}
931
932// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
933// IFF
934// 1) the logic_shifts match
935// 2) either both binops are binops and one is `and` or
936// BinOp1 is `and`
937// (logic_shift (inv_logic_shift C1, C), C) == C1 or
938//
939// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
940//
941// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
942// IFF
943// 1) the logic_shifts match
944// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
945//
946// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
947//
948// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
949// IFF
950// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
951// 2) Binop2 is `not`
952//
953// -> (arithmetic_shift Binop1((not X), Y), Amt)
954
955Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
956 const DataLayout &DL = I.getDataLayout();
957 auto IsValidBinOpc = [](unsigned Opc) {
958 switch (Opc) {
959 default:
960 return false;
961 case Instruction::And:
962 case Instruction::Or:
963 case Instruction::Xor:
964 case Instruction::Add:
965 // Skip Sub as we only match constant masks which will canonicalize to use
966 // add.
967 return true;
968 }
969 };
970
971 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
972 // constraints.
973 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
974 unsigned ShOpc) {
975 assert(ShOpc != Instruction::AShr);
976 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
977 ShOpc == Instruction::Shl;
978 };
979
980 auto GetInvShift = [](unsigned ShOpc) {
981 assert(ShOpc != Instruction::AShr);
982 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
983 };
984
985 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
986 unsigned ShOpc, Constant *CMask,
987 Constant *CShift) {
988 // If the BinOp1 is `and` we don't need to check the mask.
989 if (BinOpc1 == Instruction::And)
990 return true;
991
992 // For all other possible transfers we need complete distributable
993 // binop/shift (anything but `add` + `lshr`).
994 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
995 return false;
996
997 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
998 // vecs, otherwise the mask will be simplified and the following check will
999 // handle it).
1000 if (BinOpc2 == Instruction::And)
1001 return true;
1002
1003 // Otherwise, need mask that meets the below requirement.
1004 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1005 Constant *MaskInvShift =
1006 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1007 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
1008 CMask;
1009 };
1010
1011 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1012 Constant *CMask, *CShift;
1013 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1014 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
1015 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
1016 return nullptr;
1017 if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum),
1018 P: m_c_BinOp(L: m_CombineAnd(
1019 L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
1020 R: m_Value(V&: ShiftedX)),
1021 R: m_Value(V&: Mask))))
1022 return nullptr;
1023 // Make sure we are matching instruction shifts and not ConstantExpr
1024 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
1025 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
1026 if (!IY || !IX)
1027 return nullptr;
1028
1029 // LHS and RHS need same shift opcode
1030 unsigned ShOpc = IY->getOpcode();
1031 if (ShOpc != IX->getOpcode())
1032 return nullptr;
1033
1034 // Make sure binop is real instruction and not ConstantExpr
1035 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
1036 if (!BO2)
1037 return nullptr;
1038
1039 unsigned BinOpc = BO2->getOpcode();
1040 // Make sure we have valid binops.
1041 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1042 return nullptr;
1043
1044 if (ShOpc == Instruction::AShr) {
1045 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
1046 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
1047 Value *NotX = Builder.CreateNot(V: X);
1048 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
1049 return BinaryOperator::Create(
1050 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1051 }
1052
1053 return nullptr;
1054 }
1055
1056 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1057 // distribute to drop the shift irrelevant of constants.
1058 if (BinOpc == I.getOpcode() &&
1059 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1060 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1061 Value *NewBinOp1 = Builder.CreateBinOp(
1062 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1063 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1064 }
1065
1066 // Otherwise we can only distribute by constant shifting the mask, so
1067 // ensure we have constants.
1068 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1069 return nullptr;
1070 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1071 return nullptr;
1072
1073 // Check if we can distribute the binops.
1074 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1075 return nullptr;
1076
1077 Constant *NewCMask =
1078 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1079 Value *NewBinOp2 = Builder.CreateBinOp(
1080 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1081 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1082 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1083 S1: NewBinOp1, S2: CShift);
1084 };
1085
1086 if (Instruction *R = MatchBinOp(0))
1087 return R;
1088 return MatchBinOp(1);
1089}
1090
1091// (Binop (zext C), (select C, T, F))
1092// -> (select C, (binop 1, T), (binop 0, F))
1093//
1094// (Binop (sext C), (select C, T, F))
1095// -> (select C, (binop -1, T), (binop 0, F))
1096//
1097// Attempt to simplify binary operations into a select with folded args, when
1098// one operand of the binop is a select instruction and the other operand is a
1099// zext/sext extension, whose value is the select condition.
1100Instruction *
1101InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1102 // TODO: this simplification may be extended to any speculatable instruction,
1103 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1104 Instruction::BinaryOps Opc = I.getOpcode();
1105 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1106 Value *A, *CondVal, *TrueVal, *FalseVal;
1107 Value *CastOp;
1108 Constant *CastTrueVal, *CastFalseVal;
1109
1110 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1111 return match(V: CastOp, P: m_SelectLike(C: m_Value(V&: A), TrueC: m_Constant(C&: CastTrueVal),
1112 FalseC: m_Constant(C&: CastFalseVal))) &&
1113 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1114 R: m_Value(V&: FalseVal)));
1115 };
1116
1117 // Make sure one side of the binop is a select instruction, and the other is a
1118 // zero/sign extension operating on a i1.
1119 if (MatchSelectAndCast(LHS, RHS))
1120 CastOp = LHS;
1121 else if (MatchSelectAndCast(RHS, LHS))
1122 CastOp = RHS;
1123 else
1124 return nullptr;
1125
1126 SelectInst *SI = ProfcheckDisableMetadataFixes
1127 ? nullptr
1128 : cast<SelectInst>(Val: CastOp == LHS ? RHS : LHS);
1129
1130 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1131 bool IsCastOpRHS = (CastOp == RHS);
1132 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1133
1134 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: CastVal)
1135 : Builder.CreateBinOp(Opc, LHS: CastVal, RHS: V);
1136 };
1137
1138 // If the value used in the zext/sext is the select condition, or the negated
1139 // of the select condition, the binop can be simplified.
1140 if (CondVal == A) {
1141 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1142 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1143 S2: NewFoldedConst(true, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1144 }
1145 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1146 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1147 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1148 S2: NewFoldedConst(false, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1149 }
1150
1151 return nullptr;
1152}
1153
1154Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1155 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1156 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1157 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1158 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1159 Value *A, *B, *C, *D;
1160 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1161
1162 if (Op0)
1163 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1164 if (Op1)
1165 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1166
1167 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1168 // a common term.
1169 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1170 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1171 return V;
1172
1173 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1174 // term.
1175 if (Op0)
1176 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1177 if (Value *V =
1178 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1179 return V;
1180
1181 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1182 // term.
1183 if (Op1)
1184 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1185 if (Value *V =
1186 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1187 return V;
1188
1189 return nullptr;
1190}
1191
1192/// This tries to simplify binary operations which some other binary operation
1193/// distributes over either by factorizing out common terms
1194/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1195/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1196/// Returns the simplified value, or null if it didn't simplify.
1197Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1198 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1199 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1200 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1201 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1202
1203 // Factorization.
1204 if (Value *R = tryFactorizationFolds(I))
1205 return R;
1206
1207 // Expansion.
1208 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1209 // The instruction has the form "(A op' B) op C". See if expanding it out
1210 // to "(A op C) op' (B op C)" results in simplifications.
1211 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1212 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1213
1214 // Disable the use of undef because it's not safe to distribute undef.
1215 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1216 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1217 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1218
1219 // Do "A op C" and "B op C" both simplify?
1220 if (L && R) {
1221 // They do! Return "L op' R".
1222 ++NumExpand;
1223 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1224 C->takeName(V: &I);
1225 return C;
1226 }
1227
1228 // Does "A op C" simplify to the identity value for the inner opcode?
1229 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1230 // They do! Return "B op C".
1231 ++NumExpand;
1232 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1233 C->takeName(V: &I);
1234 return C;
1235 }
1236
1237 // Does "B op C" simplify to the identity value for the inner opcode?
1238 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1239 // They do! Return "A op C".
1240 ++NumExpand;
1241 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1242 C->takeName(V: &I);
1243 return C;
1244 }
1245 }
1246
1247 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1248 // The instruction has the form "A op (B op' C)". See if expanding it out
1249 // to "(A op B) op' (A op C)" results in simplifications.
1250 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1251 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1252
1253 // Disable the use of undef because it's not safe to distribute undef.
1254 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1255 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1256 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1257
1258 // Do "A op B" and "A op C" both simplify?
1259 if (L && R) {
1260 // They do! Return "L op' R".
1261 ++NumExpand;
1262 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1263 A->takeName(V: &I);
1264 return A;
1265 }
1266
1267 // Does "A op B" simplify to the identity value for the inner opcode?
1268 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1269 // They do! Return "A op C".
1270 ++NumExpand;
1271 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1272 A->takeName(V: &I);
1273 return A;
1274 }
1275
1276 // Does "A op C" simplify to the identity value for the inner opcode?
1277 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1278 // They do! Return "A op B".
1279 ++NumExpand;
1280 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1281 A->takeName(V: &I);
1282 return A;
1283 }
1284 }
1285
1286 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1287}
1288
1289static std::optional<std::pair<Value *, Value *>>
1290matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1291 if (LHS->getParent() != RHS->getParent())
1292 return std::nullopt;
1293
1294 if (LHS->getNumIncomingValues() < 2)
1295 return std::nullopt;
1296
1297 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1298 return std::nullopt;
1299
1300 Value *L0 = LHS->getIncomingValue(i: 0);
1301 Value *R0 = RHS->getIncomingValue(i: 0);
1302
1303 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1304 Value *L1 = LHS->getIncomingValue(i: I);
1305 Value *R1 = RHS->getIncomingValue(i: I);
1306
1307 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1308 continue;
1309
1310 return std::nullopt;
1311 }
1312
1313 return std::optional(std::pair(L0, R0));
1314}
1315
1316std::optional<std::pair<Value *, Value *>>
1317InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1318 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1319 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1320 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1321 return std::nullopt;
1322 switch (LHSInst->getOpcode()) {
1323 case Instruction::PHI:
1324 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1325 case Instruction::Select: {
1326 Value *Cond = LHSInst->getOperand(i: 0);
1327 Value *TrueVal = LHSInst->getOperand(i: 1);
1328 Value *FalseVal = LHSInst->getOperand(i: 2);
1329 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1330 FalseVal == RHSInst->getOperand(i: 1))
1331 return std::pair(TrueVal, FalseVal);
1332 return std::nullopt;
1333 }
1334 case Instruction::Call: {
1335 // Match min(a, b) and max(a, b)
1336 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1337 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1338 if (LHSMinMax && RHSMinMax &&
1339 LHSMinMax->getPredicate() ==
1340 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1341 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1342 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1343 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1344 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1345 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1346 return std::nullopt;
1347 }
1348 default:
1349 return std::nullopt;
1350 }
1351}
1352
1353Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1354 Value *LHS,
1355 Value *RHS) {
1356 Value *A, *B, *C, *D, *E, *F;
1357 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1358 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1359 if (!LHSIsSelect && !RHSIsSelect)
1360 return nullptr;
1361
1362 SelectInst *SI = ProfcheckDisableMetadataFixes
1363 ? nullptr
1364 : cast<SelectInst>(Val: LHSIsSelect ? LHS : RHS);
1365
1366 FastMathFlags FMF;
1367 BuilderTy::FastMathFlagGuard Guard(Builder);
1368 if (const auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) {
1369 FMF = FPOp->getFastMathFlags();
1370 Builder.setFastMathFlags(FMF);
1371 }
1372
1373 Instruction::BinaryOps Opcode = I.getOpcode();
1374 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1375
1376 Value *Cond, *True = nullptr, *False = nullptr;
1377
1378 // Special-case for add/negate combination. Replace the zero in the negation
1379 // with the trailing add operand:
1380 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1381 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1382 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1383 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1384 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1385 return nullptr;
1386 Value *N;
1387 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1388 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1389 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName(), MDFrom: SI);
1390 }
1391 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1392 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1393 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName(), MDFrom: SI);
1394 }
1395 return nullptr;
1396 };
1397
1398 if (LHSIsSelect && RHSIsSelect && A == D) {
1399 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1400 Cond = A;
1401 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1402 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1403
1404 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1405 if (False && !True)
1406 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1407 else if (True && !False)
1408 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1409 }
1410 } else if (LHSIsSelect && LHS->hasOneUse()) {
1411 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1412 Cond = A;
1413 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1414 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1415 if (Value *NewSel = foldAddNegate(B, C, RHS))
1416 return NewSel;
1417 } else if (RHSIsSelect && RHS->hasOneUse()) {
1418 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1419 Cond = D;
1420 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1421 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1422 if (Value *NewSel = foldAddNegate(E, F, LHS))
1423 return NewSel;
1424 }
1425
1426 if (!True || !False)
1427 return nullptr;
1428
1429 Value *NewSI = Builder.CreateSelect(C: Cond, True, False, Name: I.getName(), MDFrom: SI);
1430 NewSI->takeName(V: &I);
1431 return NewSI;
1432}
1433
1434/// Freely adapt every user of V as-if V was changed to !V.
1435/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1436void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1437 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1438 for (User *U : make_early_inc_range(Range: I->users())) {
1439 if (U == IgnoredUser)
1440 continue; // Don't consider this user.
1441 switch (cast<Instruction>(Val: U)->getOpcode()) {
1442 case Instruction::Select: {
1443 auto *SI = cast<SelectInst>(Val: U);
1444 SI->swapValues();
1445 SI->swapProfMetadata();
1446 break;
1447 }
1448 case Instruction::CondBr: {
1449 CondBrInst *BI = cast<CondBrInst>(Val: U);
1450 BI->swapSuccessors(); // swaps prof metadata too
1451 if (BPI)
1452 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1453 break;
1454 }
1455 case Instruction::Xor:
1456 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1457 // Add to worklist for DCE.
1458 addToWorklist(I: cast<Instruction>(Val: U));
1459 break;
1460 default:
1461 llvm_unreachable("Got unexpected user - out of sync with "
1462 "canFreelyInvertAllUsersOf() ?");
1463 }
1464 }
1465
1466 // Update pre-existing debug value uses.
1467 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1468 llvm::findDbgValues(V: I, DbgVariableRecords);
1469
1470 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1471 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1472 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1473 Idx != End; ++Idx)
1474 if (DbgVal->getVariableLocationOp(OpIdx: Idx) == I)
1475 DbgVal->setExpression(
1476 DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1477 }
1478}
1479
1480/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1481/// constant zero (which is the 'negate' form).
1482Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1483 Value *NegV;
1484 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1485 return NegV;
1486
1487 // Constants can be considered to be negated values if they can be folded.
1488 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1489 return ConstantExpr::getNeg(C);
1490
1491 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1492 if (C->getType()->getElementType()->isIntegerTy())
1493 return ConstantExpr::getNeg(C);
1494
1495 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1496 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1497 Constant *Elt = CV->getAggregateElement(Elt: i);
1498 if (!Elt)
1499 return nullptr;
1500
1501 if (isa<UndefValue>(Val: Elt))
1502 continue;
1503
1504 if (!isa<ConstantInt>(Val: Elt))
1505 return nullptr;
1506 }
1507 return ConstantExpr::getNeg(C: CV);
1508 }
1509
1510 // Negate integer vector splats.
1511 if (auto *CV = dyn_cast<Constant>(Val: V))
1512 if (CV->getType()->isVectorTy() &&
1513 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1514 return ConstantExpr::getNeg(C: CV);
1515
1516 return nullptr;
1517}
1518
1519// Try to fold:
1520// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1521// -> ({s|u}itofp (int_binop x, y))
1522// 2) (fp_binop ({s|u}itofp x), FpC)
1523// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1524//
1525// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1526Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1527 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1528 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1529
1530 Type *FPTy = BO.getType();
1531 Type *IntTy = IntOps[0]->getType();
1532
1533 unsigned IntSz = IntTy->getScalarSizeInBits();
1534 // This is the maximum number of inuse bits by the integer where the int -> fp
1535 // casts are exact.
1536 unsigned MaxRepresentableBits =
1537 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1538
1539 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1540 // checks later on.
1541 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1542
1543 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1544 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1545 auto IsNonZero = [&](unsigned OpNo) -> bool {
1546 if (OpsKnown[OpNo].hasKnownBits() &&
1547 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1548 return true;
1549 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1550 };
1551
1552 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1553 // NB: This matches the impl in ValueTracking, we just try to use cached
1554 // knownbits here. If we ever start supporting WithCache for
1555 // `isKnownNonNegative`, change this to an explicit call.
1556 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1557 };
1558
1559 // Check if we know for certain that ({s|u}itofp op) is exact.
1560 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1561 // Can we treat this operand as the desired sign?
1562 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1563 !IsNonNeg(OpNo))
1564 return false;
1565
1566 // If fp precision >= bitwidth(op) then its exact.
1567 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1568 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1569 // handled specially. We can't, however, increase the bound arbitrarily for
1570 // `sitofp` as for larger sizes, it won't sign extend.
1571 if (MaxRepresentableBits < IntSz) {
1572 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1573 // numSignBits(op).
1574 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1575 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1576 if (OpsFromSigned)
1577 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1578 // Finally for unsigned check that fp precision >= bitwidth(op) -
1579 // numLeadingZeros(op).
1580 else {
1581 NumUsedLeadingBits[OpNo] =
1582 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1583 }
1584 }
1585 // NB: We could also check if op is known to be a power of 2 or zero (which
1586 // will always be representable). Its unlikely, however, that is we are
1587 // unable to bound op in any way we will be able to pass the overflow checks
1588 // later on.
1589
1590 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1591 return false;
1592 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1593 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1594 IsNonZero(OpNo);
1595 };
1596
1597 // If we have a constant rhs, see if we can losslessly convert it to an int.
1598 if (Op1FpC != nullptr) {
1599 // Signed + Mul req non-zero
1600 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1601 !match(V: Op1FpC, P: m_NonZeroFP()))
1602 return nullptr;
1603
1604 Constant *Op1IntC = ConstantFoldCastOperand(
1605 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1606 DestTy: IntTy, DL);
1607 if (Op1IntC == nullptr)
1608 return nullptr;
1609 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1610 : Instruction::UIToFP,
1611 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1612 return nullptr;
1613
1614 // First try to keep sign of cast the same.
1615 IntOps[1] = Op1IntC;
1616 }
1617
1618 // Ensure lhs/rhs integer types match.
1619 if (IntTy != IntOps[1]->getType())
1620 return nullptr;
1621
1622 if (Op1FpC == nullptr) {
1623 if (!IsValidPromotion(1))
1624 return nullptr;
1625 }
1626 if (!IsValidPromotion(0))
1627 return nullptr;
1628
1629 // Final we check if the integer version of the binop will not overflow.
1630 BinaryOperator::BinaryOps IntOpc;
1631 // Because of the precision check, we can often rule out overflows.
1632 bool NeedsOverflowCheck = true;
1633 // Try to conservatively rule out overflow based on the already done precision
1634 // checks.
1635 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1636 unsigned OverflowMaxCurBits =
1637 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1638 bool OutputSigned = OpsFromSigned;
1639 switch (BO.getOpcode()) {
1640 case Instruction::FAdd:
1641 IntOpc = Instruction::Add;
1642 OverflowMaxOutputBits += OverflowMaxCurBits;
1643 break;
1644 case Instruction::FSub:
1645 IntOpc = Instruction::Sub;
1646 OverflowMaxOutputBits += OverflowMaxCurBits;
1647 break;
1648 case Instruction::FMul:
1649 IntOpc = Instruction::Mul;
1650 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1651 break;
1652 default:
1653 llvm_unreachable("Unsupported binop");
1654 }
1655 // The precision check may have already ruled out overflow.
1656 if (OverflowMaxOutputBits < IntSz) {
1657 NeedsOverflowCheck = false;
1658 // We can bound unsigned overflow from sub to in range signed value (this is
1659 // what allows us to avoid the overflow check for sub).
1660 if (IntOpc == Instruction::Sub)
1661 OutputSigned = true;
1662 }
1663
1664 // Precision check did not rule out overflow, so need to check.
1665 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1666 // `IntOps[...]` arguments to `KnownOps[...]`.
1667 if (NeedsOverflowCheck &&
1668 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1669 return nullptr;
1670
1671 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1672 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1673 IntBO->setHasNoSignedWrap(OutputSigned);
1674 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1675 }
1676 if (OutputSigned)
1677 return new SIToFPInst(IntBinOp, FPTy);
1678 return new UIToFPInst(IntBinOp, FPTy);
1679}
1680
1681// Try to fold:
1682// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1683// -> ({s|u}itofp (int_binop x, y))
1684// 2) (fp_binop ({s|u}itofp x), FpC)
1685// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1686Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1687 // Don't perform the fold on vectors, as the integer operation may be much
1688 // more expensive than the float operation in that case.
1689 if (BO.getType()->isVectorTy())
1690 return nullptr;
1691
1692 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1693 Constant *Op1FpC = nullptr;
1694 // Check for:
1695 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1696 // 2) (binop ({s|u}itofp x), FpC)
1697 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) &&
1698 !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0]))))
1699 return nullptr;
1700
1701 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1702 !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) &&
1703 !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1]))))
1704 return nullptr;
1705
1706 // Cache KnownBits a bit to potentially save some analysis.
1707 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1708
1709 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1710 // different constraints depending on the sign of the cast.
1711 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1712 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1713 IntOps, Op1FpC, OpsKnown))
1714 return R;
1715 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1716 Op1FpC, OpsKnown);
1717}
1718
1719/// A binop with a constant operand and a sign-extended boolean operand may be
1720/// converted into a select of constants by applying the binary operation to
1721/// the constant with the two possible values of the extended boolean (0 or -1).
1722Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1723 // TODO: Handle non-commutative binop (constant is operand 0).
1724 // TODO: Handle zext.
1725 // TODO: Peek through 'not' of cast.
1726 Value *BO0 = BO.getOperand(i_nocapture: 0);
1727 Value *BO1 = BO.getOperand(i_nocapture: 1);
1728 Value *X;
1729 Constant *C;
1730 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1731 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1732 return nullptr;
1733
1734 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1735 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1736 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1737 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1738 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1739 return createSelectInstWithUnknownProfile(C: X, S1: TVal, S2: FVal);
1740}
1741
1742static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1743 bool IsTrueArm) {
1744 SmallVector<Value *> Ops;
1745 for (Value *Op : I.operands()) {
1746 Value *V = nullptr;
1747 if (Op == SI) {
1748 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1749 } else if (match(V: SI->getCondition(),
1750 P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1751 : ICmpInst::ICMP_NE,
1752 L: m_Specific(V: Op), R: m_Value(V))) &&
1753 isGuaranteedNotToBeUndefOrPoison(V)) {
1754 // Pass
1755 } else if (match(V: Op, P: m_ZExt(Op: m_Specific(V: SI->getCondition())))) {
1756 V = IsTrueArm ? ConstantInt::get(Ty: Op->getType(), V: 1)
1757 : ConstantInt::getNullValue(Ty: Op->getType());
1758 } else {
1759 V = Op;
1760 }
1761 Ops.push_back(Elt: V);
1762 }
1763
1764 return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1765}
1766
1767static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1768 Value *NewOp, InstCombiner &IC) {
1769 Instruction *Clone = I.clone();
1770 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1771 Clone->dropUBImplyingAttrsAndMetadata();
1772 IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1773 return Clone;
1774}
1775
1776Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1777 bool FoldWithMultiUse,
1778 bool SimplifyBothArms) {
1779 // Don't modify shared select instructions unless set FoldWithMultiUse
1780 if (!SI->hasOneUser() && !FoldWithMultiUse)
1781 return nullptr;
1782
1783 Value *TV = SI->getTrueValue();
1784 Value *FV = SI->getFalseValue();
1785
1786 // Bool selects with constant operands can be folded to logical ops.
1787 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1788 return nullptr;
1789
1790 // Avoid breaking min/max reduction pattern,
1791 // which is necessary for vectorization later.
1792 if (isa<MinMaxIntrinsic>(Val: &Op))
1793 for (Value *IntrinOp : Op.operands())
1794 if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1795 for (Value *PhiOp : PN->operands())
1796 if (PhiOp == &Op)
1797 return nullptr;
1798
1799 // Test if a FCmpInst instruction is used exclusively by a select as
1800 // part of a minimum or maximum operation. If so, refrain from doing
1801 // any other folding. This helps out other analyses which understand
1802 // non-obfuscated minimum and maximum idioms. And in this case, at
1803 // least one of the comparison operands has at least one user besides
1804 // the compare (the select), which would often largely negate the
1805 // benefit of folding anyway.
1806 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1807 if (CI->hasOneUse()) {
1808 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1809 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1810 !CI->isCommutative())
1811 return nullptr;
1812 }
1813 }
1814
1815 // Make sure that one of the select arms folds successfully.
1816 Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true);
1817 Value *NewFV =
1818 simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false);
1819 if (!NewTV && !NewFV)
1820 return nullptr;
1821
1822 if (SimplifyBothArms && !(NewTV && NewFV))
1823 return nullptr;
1824
1825 // Create an instruction for the arm that did not fold.
1826 if (!NewTV)
1827 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1828 if (!NewFV)
1829 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1830 return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1831}
1832
1833static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1834 Value *InValue, BasicBlock *InBB,
1835 const DataLayout &DL,
1836 const SimplifyQuery SQ) {
1837 // NB: It is a precondition of this transform that the operands be
1838 // phi translatable!
1839 SmallVector<Value *> Ops;
1840 for (Value *Op : I.operands()) {
1841 if (Op == PN)
1842 Ops.push_back(Elt: InValue);
1843 else
1844 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1845 }
1846
1847 // Don't consider the simplification successful if we get back a constant
1848 // expression. That's just an instruction in hiding.
1849 // Also reject the case where we simplify back to the phi node. We wouldn't
1850 // be able to remove it in that case.
1851 Value *NewVal = simplifyInstructionWithOperands(
1852 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1853 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1854 return NewVal;
1855
1856 // Check if incoming PHI value can be replaced with constant
1857 // based on implied condition.
1858 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(Val: InBB->getTerminator());
1859 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1860 if (TerminatorBI &&
1861 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1862 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1863 std::optional<bool> ImpliedCond = isImpliedCondition(
1864 LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1],
1865 DL, LHSIsTrue);
1866 if (ImpliedCond)
1867 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1868 }
1869
1870 return nullptr;
1871}
1872
1873/// In some cases it is beneficial to fold a select into a binary operator.
1874/// For example:
1875/// %1 = or %in, 4
1876/// %2 = select %cond, %1, %in
1877/// %3 = or %2, 1
1878/// =>
1879/// %1 = select i1 %cond, 5, 1
1880/// %2 = or %1, %in
1881Instruction *InstCombinerImpl::foldBinOpSelectBinOp(BinaryOperator &Op) {
1882 assert(Op.isAssociative() && "The operation must be associative!");
1883
1884 SelectInst *SI = dyn_cast<SelectInst>(Val: Op.getOperand(i_nocapture: 0));
1885
1886 Constant *Const;
1887 if (!SI || !match(V: Op.getOperand(i_nocapture: 1), P: m_ImmConstant(C&: Const)) ||
1888 !Op.hasOneUse() || !SI->hasOneUse())
1889 return nullptr;
1890
1891 Value *TV = SI->getTrueValue();
1892 Value *FV = SI->getFalseValue();
1893 Value *Input, *NewTV, *NewFV;
1894 Constant *Const2;
1895
1896 if (TV->hasOneUse() && match(V: TV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: FV),
1897 R: m_ImmConstant(C&: Const2)))) {
1898 NewTV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1899 NewFV = Const;
1900 Input = FV;
1901 } else if (FV->hasOneUse() &&
1902 match(V: FV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: TV),
1903 R: m_ImmConstant(C&: Const2)))) {
1904 NewTV = Const;
1905 NewFV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1906 Input = TV;
1907 } else
1908 return nullptr;
1909
1910 if (!NewTV || !NewFV)
1911 return nullptr;
1912
1913 Value *NewSI =
1914 Builder.CreateSelect(C: SI->getCondition(), True: NewTV, False: NewFV, Name: "",
1915 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : SI);
1916 return BinaryOperator::Create(Op: Op.getOpcode(), S1: NewSI, S2: Input);
1917}
1918
1919Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1920 bool AllowMultipleUses) {
1921 unsigned NumPHIValues = PN->getNumIncomingValues();
1922 if (NumPHIValues == 0)
1923 return nullptr;
1924
1925 // We normally only transform phis with a single use. However, if a PHI has
1926 // multiple uses and they are all the same operation, we can fold *all* of the
1927 // uses into the PHI.
1928 bool OneUse = PN->hasOneUse();
1929 bool IdenticalUsers = false;
1930 if (!AllowMultipleUses && !OneUse) {
1931 // Walk the use list for the instruction, comparing them to I.
1932 for (User *U : PN->users()) {
1933 Instruction *UI = cast<Instruction>(Val: U);
1934 if (UI != &I && !I.isIdenticalTo(I: UI))
1935 return nullptr;
1936 }
1937 // Otherwise, we can replace *all* users with the new PHI we form.
1938 IdenticalUsers = true;
1939 }
1940
1941 // Check that all operands are phi-translatable.
1942 for (Value *Op : I.operands()) {
1943 if (Op == PN)
1944 continue;
1945
1946 // Non-instructions never require phi-translation.
1947 auto *I = dyn_cast<Instruction>(Val: Op);
1948 if (!I)
1949 continue;
1950
1951 // Phi-translate can handle phi nodes in the same block.
1952 if (isa<PHINode>(Val: I))
1953 if (I->getParent() == PN->getParent())
1954 continue;
1955
1956 // Operand dominates the block, no phi-translation necessary.
1957 if (DT.dominates(Def: I, BB: PN->getParent()))
1958 continue;
1959
1960 // Not phi-translatable, bail out.
1961 return nullptr;
1962 }
1963
1964 // Check to see whether the instruction can be folded into each phi operand.
1965 // If there is one operand that does not fold, remember the BB it is in.
1966 SmallVector<Value *> NewPhiValues;
1967 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1968 bool SeenNonSimplifiedInVal = false;
1969 for (unsigned i = 0; i != NumPHIValues; ++i) {
1970 Value *InVal = PN->getIncomingValue(i);
1971 BasicBlock *InBB = PN->getIncomingBlock(i);
1972
1973 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1974 NewPhiValues.push_back(Elt: NewVal);
1975 continue;
1976 }
1977
1978 // Handle some cases that can't be fully simplified, but where we know that
1979 // the two instructions will fold into one.
1980 auto WillFold = [&]() {
1981 if (!InVal->hasUseList() || !InVal->hasOneUser())
1982 return false;
1983
1984 // icmp of ucmp/scmp with constant will fold to icmp.
1985 const APInt *Ignored;
1986 if (isa<CmpIntrinsic>(Val: InVal) &&
1987 match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
1988 return true;
1989
1990 // icmp eq zext(bool), 0 will fold to !bool.
1991 if (isa<ZExtInst>(Val: InVal) &&
1992 cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) &&
1993 match(V: &I,
1994 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
1995 return true;
1996
1997 return false;
1998 };
1999
2000 if (WillFold()) {
2001 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2002 NewPhiValues.push_back(Elt: nullptr);
2003 continue;
2004 }
2005
2006 if (!OneUse && !IdenticalUsers)
2007 return nullptr;
2008
2009 if (SeenNonSimplifiedInVal)
2010 return nullptr; // More than one non-simplified value.
2011 SeenNonSimplifiedInVal = true;
2012
2013 // If there is exactly one non-simplified value, we can insert a copy of the
2014 // operation in that block. However, if this is a critical edge, we would
2015 // be inserting the computation on some other paths (e.g. inside a loop).
2016 // Only do this if the pred block is unconditionally branching into the phi
2017 // block. Also, make sure that the pred block is not dead code.
2018 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: InBB->getTerminator());
2019 if (!BI || !DT.isReachableFromEntry(A: InBB))
2020 return nullptr;
2021
2022 NewPhiValues.push_back(Elt: nullptr);
2023 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2024
2025 // Do not push the operation across a loop backedge. This could result in
2026 // an infinite combine loop, and is generally non-profitable (especially
2027 // if the operation was originally outside the loop).
2028 if (isBackEdge(From: InBB, To: PN->getParent()))
2029 return nullptr;
2030 }
2031
2032 // Clone the instruction that uses the phi node and move it into the incoming
2033 // BB because we know that the next iteration of InstCombine will simplify it.
2034 SmallDenseMap<BasicBlock *, Instruction *> Clones;
2035 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2036 Value *Op = PN->getIncomingValue(i: OpIndex);
2037 BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
2038
2039 Instruction *Clone = Clones.lookup(Val: OpBB);
2040 if (!Clone) {
2041 Clone = I.clone();
2042 for (Use &U : Clone->operands()) {
2043 if (U == PN)
2044 U = Op;
2045 else
2046 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
2047 }
2048 Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
2049 Clones.insert(KV: {OpBB, Clone});
2050 // We may have speculated the instruction.
2051 Clone->dropUBImplyingAttrsAndMetadata();
2052 }
2053
2054 NewPhiValues[OpIndex] = Clone;
2055 }
2056
2057 // Okay, we can do the transformation: create the new PHI node.
2058 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
2059 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
2060 NewPN->takeName(V: PN);
2061 NewPN->setDebugLoc(PN->getDebugLoc());
2062
2063 for (unsigned i = 0; i != NumPHIValues; ++i)
2064 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
2065
2066 if (IdenticalUsers) {
2067 // Collect and deduplicate users up-front to avoid iterator invalidation.
2068 SmallSetVector<Instruction *, 4> ToReplace;
2069 for (User *U : PN->users()) {
2070 Instruction *User = cast<Instruction>(Val: U);
2071 if (User == &I)
2072 continue;
2073 ToReplace.insert(X: User);
2074 }
2075 for (Instruction *I : ToReplace) {
2076 replaceInstUsesWith(I&: *I, V: NewPN);
2077 eraseInstFromFunction(I&: *I);
2078 }
2079 OneUse = true;
2080 }
2081
2082 if (OneUse) {
2083 replaceAllDbgUsesWith(From&: *PN, To&: *NewPN, DomPoint&: *PN, DT);
2084 }
2085 return replaceInstUsesWith(I, V: NewPN);
2086}
2087
2088Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
2089 if (!BO.isAssociative())
2090 return nullptr;
2091
2092 // Find the interleaved binary ops.
2093 auto Opc = BO.getOpcode();
2094 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0));
2095 auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1));
2096 if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) ||
2097 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2098 !BO0->isAssociative() || !BO1->isAssociative() ||
2099 BO0->getParent() != BO1->getParent())
2100 return nullptr;
2101
2102 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2103 "Expected commutative instructions!");
2104
2105 // Find the matching phis, forming the recurrences.
2106 PHINode *PN0, *PN1;
2107 Value *Start0, *Step0, *Start1, *Step1;
2108 if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() ||
2109 !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() ||
2110 PN0->getParent() != PN1->getParent())
2111 return nullptr;
2112
2113 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2114 "Expected PHIs with two incoming values!");
2115
2116 // Convert the start and step values to constants.
2117 auto *Init0 = dyn_cast<Constant>(Val: Start0);
2118 auto *Init1 = dyn_cast<Constant>(Val: Start1);
2119 auto *C0 = dyn_cast<Constant>(Val: Step0);
2120 auto *C1 = dyn_cast<Constant>(Val: Step1);
2121 if (!Init0 || !Init1 || !C0 || !C1)
2122 return nullptr;
2123
2124 // Fold the recurrence constants.
2125 auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2126 auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2127 if (!Init || !C)
2128 return nullptr;
2129
2130 // Create the reduced PHI.
2131 auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2132 NameStr: "reduced.phi");
2133
2134 // Create the new binary op.
2135 auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2136 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2137 // Intersect FMF flags for FADD and FMUL.
2138 FastMathFlags Intersect = BO0->getFastMathFlags() &
2139 BO1->getFastMathFlags() & BO.getFastMathFlags();
2140 NewBO->setFastMathFlags(Intersect);
2141 } else {
2142 OverflowTracking Flags;
2143 Flags.AllKnownNonNegative = false;
2144 Flags.AllKnownNonZero = false;
2145 Flags.mergeFlags(I&: *BO0);
2146 Flags.mergeFlags(I&: *BO1);
2147 Flags.mergeFlags(I&: BO);
2148 Flags.applyFlags(I&: *NewBO);
2149 }
2150 NewBO->takeName(V: &BO);
2151
2152 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2153 auto *V = PN0->getIncomingValue(i: I);
2154 auto *BB = PN0->getIncomingBlock(i: I);
2155 if (V == Init0) {
2156 assert(((PN1->getIncomingValue(0) == Init1 &&
2157 PN1->getIncomingBlock(0) == BB) ||
2158 (PN1->getIncomingValue(1) == Init1 &&
2159 PN1->getIncomingBlock(1) == BB)) &&
2160 "Invalid incoming block!");
2161 NewPN->addIncoming(V: Init, BB);
2162 } else if (V == BO0) {
2163 assert(((PN1->getIncomingValue(0) == BO1 &&
2164 PN1->getIncomingBlock(0) == BB) ||
2165 (PN1->getIncomingValue(1) == BO1 &&
2166 PN1->getIncomingBlock(1) == BB)) &&
2167 "Invalid incoming block!");
2168 NewPN->addIncoming(V: NewBO, BB);
2169 } else
2170 llvm_unreachable("Unexpected incoming value!");
2171 }
2172
2173 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2174 << "\n with " << *PN1 << "\n " << *BO1
2175 << '\n');
2176
2177 // Insert the new recurrence and remove the old (dead) ones.
2178 InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2179 InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2180
2181 eraseInstFromFunction(
2182 I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType())));
2183 eraseInstFromFunction(
2184 I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType())));
2185 eraseInstFromFunction(I&: *PN0);
2186 eraseInstFromFunction(I&: *PN1);
2187
2188 return replaceInstUsesWith(I&: BO, V: NewBO);
2189}
2190
2191Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2192 // Attempt to fold binary operators whose operands are simple recurrences.
2193 if (auto *NewBO = foldBinopWithRecurrence(BO))
2194 return NewBO;
2195
2196 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2197 // we are guarding against replicating the binop in >1 predecessor.
2198 // This could miss matching a phi with 2 constant incoming values.
2199 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
2200 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
2201 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2202 Phi0->getNumOperands() != Phi1->getNumOperands())
2203 return nullptr;
2204
2205 // TODO: Remove the restriction for binop being in the same block as the phis.
2206 if (BO.getParent() != Phi0->getParent() ||
2207 BO.getParent() != Phi1->getParent())
2208 return nullptr;
2209
2210 // Fold if there is at least one specific constant value in phi0 or phi1's
2211 // incoming values that comes from the same block and this specific constant
2212 // value can be used to do optimization for specific binary operator.
2213 // For example:
2214 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2215 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2216 // %add = add i32 %phi0, %phi1
2217 // ==>
2218 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2219 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2220 /*AllowRHSConstant*/ false);
2221 if (C) {
2222 SmallVector<Value *, 4> NewIncomingValues;
2223 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2224 auto &Phi0Use = std::get<0>(t&: T);
2225 auto &Phi1Use = std::get<1>(t&: T);
2226 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2227 return false;
2228 Value *Phi0UseV = Phi0Use.get();
2229 Value *Phi1UseV = Phi1Use.get();
2230 if (Phi0UseV == C)
2231 NewIncomingValues.push_back(Elt: Phi1UseV);
2232 else if (Phi1UseV == C)
2233 NewIncomingValues.push_back(Elt: Phi0UseV);
2234 else
2235 return false;
2236 return true;
2237 };
2238
2239 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2240 P: CanFoldIncomingValuePair)) {
2241 PHINode *NewPhi =
2242 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2243 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2244 "The number of collected incoming values should equal the number "
2245 "of the original PHINode operands!");
2246 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2247 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
2248 return NewPhi;
2249 }
2250 }
2251
2252 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2253 return nullptr;
2254
2255 // Match a pair of incoming constants for one of the predecessor blocks.
2256 BasicBlock *ConstBB, *OtherBB;
2257 Constant *C0, *C1;
2258 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
2259 ConstBB = Phi0->getIncomingBlock(i: 0);
2260 OtherBB = Phi0->getIncomingBlock(i: 1);
2261 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
2262 ConstBB = Phi0->getIncomingBlock(i: 1);
2263 OtherBB = Phi0->getIncomingBlock(i: 0);
2264 } else {
2265 return nullptr;
2266 }
2267 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2268 return nullptr;
2269
2270 // The block that we are hoisting to must reach here unconditionally.
2271 // Otherwise, we could be speculatively executing an expensive or
2272 // non-speculative op.
2273 auto *PredBlockBranch = dyn_cast<UncondBrInst>(Val: OtherBB->getTerminator());
2274 if (!PredBlockBranch || !DT.isReachableFromEntry(A: OtherBB))
2275 return nullptr;
2276
2277 // TODO: This check could be tightened to only apply to binops (div/rem) that
2278 // are not safe to speculatively execute. But that could allow hoisting
2279 // potentially expensive instructions (fdiv for example).
2280 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2281 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2282 return nullptr;
2283
2284 // Fold constants for the predecessor block with constant incoming values.
2285 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2286 if (!NewC)
2287 return nullptr;
2288
2289 // Make a new binop in the predecessor block with the non-constant incoming
2290 // values.
2291 Builder.SetInsertPoint(PredBlockBranch);
2292 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2293 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2294 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2295 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2296 NotFoldedNewBO->copyIRFlags(V: &BO);
2297
2298 // Replace the binop with a phi of the new values. The old phis are dead.
2299 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
2300 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2301 NewPhi->addIncoming(V: NewC, BB: ConstBB);
2302 return NewPhi;
2303}
2304
2305Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2306 auto TryFoldOperand = [&](unsigned OpIdx,
2307 bool IsOtherParamConst) -> Instruction * {
2308 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: OpIdx)))
2309 return FoldOpIntoSelect(Op&: I, SI: Sel, FoldWithMultiUse: false, SimplifyBothArms: !IsOtherParamConst);
2310 if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: OpIdx)))
2311 return foldOpIntoPhi(I, PN);
2312 return nullptr;
2313 };
2314
2315 if (Instruction *NewI =
2316 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(Val: I.getOperand(i_nocapture: 1))))
2317 return NewI;
2318 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(Val: I.getOperand(i_nocapture: 0)));
2319}
2320
2321static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2322 // If this GEP has only 0 indices, it is the same pointer as
2323 // Src. If Src is not a trivial GEP too, don't combine
2324 // the indices.
2325 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2326 !Src.hasOneUse())
2327 return false;
2328 return true;
2329}
2330
2331/// Find a constant NewC that has property:
2332/// shuffle(NewC, ShMask) = C
2333/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2334///
2335/// A 1-to-1 mapping is not required. Example:
2336/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2337Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
2338 VectorType *NewCTy) {
2339 if (isa<ScalableVectorType>(Val: NewCTy)) {
2340 Constant *Splat = C->getSplatValue();
2341 if (!Splat)
2342 return nullptr;
2343 return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2344 }
2345
2346 if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2347 cast<FixedVectorType>(Val: C->getType())->getNumElements())
2348 return nullptr;
2349
2350 unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2351 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2352 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2353 unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2354 for (unsigned I = 0; I < NumElts; ++I) {
2355 Constant *CElt = C->getAggregateElement(Elt: I);
2356 if (ShMask[I] >= 0) {
2357 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2358 Constant *NewCElt = NewVecC[ShMask[I]];
2359 // Bail out if:
2360 // 1. The constant vector contains a constant expression.
2361 // 2. The shuffle needs an element of the constant vector that can't
2362 // be mapped to a new constant vector.
2363 // 3. This is a widening shuffle that copies elements of V1 into the
2364 // extended elements (extending with poison is allowed).
2365 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2366 I >= NewCNumElts)
2367 return nullptr;
2368 NewVecC[ShMask[I]] = CElt;
2369 }
2370 }
2371 return ConstantVector::get(V: NewVecC);
2372}
2373
2374// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2375static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
2376 Constant *Splat, bool SplatLHS,
2377 const DataLayout &DL) {
2378 ElementCount EC = cast<VectorType>(Val: Vector->getType())->getElementCount();
2379 Constant *LHS = ConstantVector::getSplat(EC, Elt: Splat);
2380 Constant *RHS = Vector;
2381 if (!SplatLHS)
2382 std::swap(a&: LHS, b&: RHS);
2383 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2384}
2385
2386template <Intrinsic::ID SpliceID>
2387static Instruction *foldSpliceBinOp(BinaryOperator &Inst,
2388 InstCombiner::BuilderTy &Builder) {
2389 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2390 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2391 Value *V = Builder.CreateBinOp(Opc: Inst.getOpcode(), LHS: X, RHS: Y, Name: Inst.getName());
2392 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2393 BO->copyIRFlags(V: &Inst);
2394 Module *M = Inst.getModule();
2395 Function *F = Intrinsic::getOrInsertDeclaration(M, id: SpliceID, Tys: V->getType());
2396 return CallInst::Create(Func: F, Args: {V, PoisonValue::get(T: V->getType()), Offset});
2397 };
2398 Value *V1, *V2, *Offset;
2399 if (match(LHS,
2400 m_Intrinsic<SpliceID>(m_Value(V&: V1), m_Poison(), m_Value(V&: Offset)))) {
2401 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2402 // -> splice(Op(V1, V2), poison, offset)
2403 if (match(RHS, m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2404 m_Specific(V: Offset))) &&
2405 (LHS->hasOneUse() || RHS->hasOneUse() ||
2406 (LHS == RHS && LHS->hasNUses(N: 2))))
2407 return CreateBinOpSplice(V1, V2, Offset);
2408
2409 // Op(splice(V1, poison, offset), RHSSplat)
2410 // -> splice(Op(V1, RHSSplat), poison, offset)
2411 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2412 return CreateBinOpSplice(V1, RHS, Offset);
2413 }
2414 // Op(LHSSplat, splice(V2, poison, offset))
2415 // -> splice(Op(LHSSplat, V2), poison, offset)
2416 else if (isSplatValue(V: LHS) &&
2417 match(RHS, m_OneUse(m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2418 m_Value(V&: Offset)))))
2419 return CreateBinOpSplice(LHS, V2, Offset);
2420
2421 // TODO: Fold binops of the form
2422 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2423 // -> splice(poison, Op(V1, V2), offset)
2424
2425 return nullptr;
2426}
2427
2428Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2429 if (!isa<VectorType>(Val: Inst.getType()))
2430 return nullptr;
2431
2432 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2433 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2434 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2435 cast<VectorType>(Inst.getType())->getElementCount());
2436 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2437 cast<VectorType>(Inst.getType())->getElementCount());
2438
2439 auto foldConstantsThroughSubVectorInsertSplat =
2440 [&](Value *MaybeSubVector, Value *MaybeSplat,
2441 bool SplatLHS) -> Instruction * {
2442 Value *Idx;
2443 Constant *Splat, *SubVector, *Dest;
2444 if (!match(V: MaybeSplat, P: m_ConstantSplat(SubPattern: m_Constant(C&: Splat))) ||
2445 !match(V: MaybeSubVector,
2446 P: m_VectorInsert(Op0: m_Constant(C&: Dest), Op1: m_Constant(C&: SubVector),
2447 Op2: m_Value(V&: Idx))))
2448 return nullptr;
2449 SubVector =
2450 constantFoldBinOpWithSplat(Opcode, Vector: SubVector, Splat, SplatLHS, DL);
2451 Dest = constantFoldBinOpWithSplat(Opcode, Vector: Dest, Splat, SplatLHS, DL);
2452 if (!SubVector || !Dest)
2453 return nullptr;
2454 auto *InsertVector =
2455 Builder.CreateInsertVector(DstType: Dest->getType(), SrcVec: Dest, SubVec: SubVector, Idx);
2456 return replaceInstUsesWith(I&: Inst, V: InsertVector);
2457 };
2458
2459 // If one operand is a constant splat and the other operand is a
2460 // `vector.insert` where both the destination and subvector are constant,
2461 // apply the operation to both the destination and subvector, returning a new
2462 // constant `vector.insert`. This helps constant folding for scalable vectors.
2463 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2464 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2465 return Folded;
2466 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2467 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2468 return Folded;
2469
2470 // If both operands of the binop are vector concatenations, then perform the
2471 // narrow binop on each pair of the source operands followed by concatenation
2472 // of the results.
2473 Value *L0, *L1, *R0, *R1;
2474 ArrayRef<int> Mask;
2475 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2476 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2477 LHS->hasOneUse() && RHS->hasOneUse() &&
2478 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2479 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2480 // This transform does not have the speculative execution constraint as
2481 // below because the shuffle is a concatenation. The new binops are
2482 // operating on exactly the same elements as the existing binop.
2483 // TODO: We could ease the mask requirement to allow different undef lanes,
2484 // but that requires an analysis of the binop-with-undef output value.
2485 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2486 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2487 BO->copyIRFlags(V: &Inst);
2488 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2489 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2490 BO->copyIRFlags(V: &Inst);
2491 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2492 }
2493
2494 auto createBinOpReverse = [&](Value *X, Value *Y) {
2495 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2496 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2497 BO->copyIRFlags(V: &Inst);
2498 Module *M = Inst.getModule();
2499 Function *F = Intrinsic::getOrInsertDeclaration(
2500 M, id: Intrinsic::vector_reverse, Tys: V->getType());
2501 return CallInst::Create(Func: F, Args: V);
2502 };
2503
2504 // NOTE: Reverse shuffles don't require the speculative execution protection
2505 // below because they don't affect which lanes take part in the computation.
2506
2507 Value *V1, *V2;
2508 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2509 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2510 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2511 (LHS->hasOneUse() || RHS->hasOneUse() ||
2512 (LHS == RHS && LHS->hasNUses(N: 2))))
2513 return createBinOpReverse(V1, V2);
2514
2515 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2516 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2517 return createBinOpReverse(V1, RHS);
2518 }
2519 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2520 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2521 return createBinOpReverse(LHS, V2);
2522
2523 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2524 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2525 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2526 BO->copyIRFlags(V: &Inst);
2527
2528 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2529 Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2530 Module *M = Inst.getModule();
2531 Function *F = Intrinsic::getOrInsertDeclaration(
2532 M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType());
2533 return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2534 };
2535
2536 Value *EVL;
2537 if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2538 Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2539 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2540 if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2541 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2542 (LHS->hasOneUse() || RHS->hasOneUse() ||
2543 (LHS == RHS && LHS->hasNUses(N: 2))))
2544 return createBinOpVPReverse(V1, V2, EVL);
2545
2546 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2547 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2548 return createBinOpVPReverse(V1, RHS, EVL);
2549 }
2550 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2551 else if (isSplatValue(V: LHS) &&
2552 match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2553 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2554 return createBinOpVPReverse(LHS, V2, EVL);
2555
2556 if (Instruction *Folded =
2557 foldSpliceBinOp<Intrinsic::vector_splice_left>(Inst, Builder))
2558 return Folded;
2559 if (Instruction *Folded =
2560 foldSpliceBinOp<Intrinsic::vector_splice_right>(Inst, Builder))
2561 return Folded;
2562
2563 // It may not be safe to reorder shuffles and things like div, urem, etc.
2564 // because we may trap when executing those ops on unknown vector elements.
2565 // See PR20059.
2566 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2567 return nullptr;
2568
2569 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2570 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2571 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2572 BO->copyIRFlags(V: &Inst);
2573 return new ShuffleVectorInst(XY, M);
2574 };
2575
2576 // If both arguments of the binary operation are shuffles that use the same
2577 // mask and shuffle within a single vector, move the shuffle after the binop.
2578 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2579 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2580 V1->getType() == V2->getType() &&
2581 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2582 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2583 return createBinOpShuffle(V1, V2, Mask);
2584 }
2585
2586 // If both arguments of a commutative binop are select-shuffles that use the
2587 // same mask with commuted operands, the shuffles are unnecessary.
2588 if (Inst.isCommutative() &&
2589 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2590 match(V: RHS,
2591 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2592 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2593 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2594 // TODO: Allow shuffles that contain undefs in the mask?
2595 // That is legal, but it reduces undef knowledge.
2596 // TODO: Allow arbitrary shuffles by shuffling after binop?
2597 // That might be legal, but we have to deal with poison.
2598 if (LShuf->isSelect() &&
2599 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2600 RShuf->isSelect() &&
2601 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2602 // Example:
2603 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2604 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2605 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2606 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2607 NewBO->copyIRFlags(V: &Inst);
2608 return NewBO;
2609 }
2610 }
2611
2612 // If one argument is a shuffle within one vector and the other is a constant,
2613 // try moving the shuffle after the binary operation. This canonicalization
2614 // intends to move shuffles closer to other shuffles and binops closer to
2615 // other binops, so they can be folded. It may also enable demanded elements
2616 // transforms.
2617 Constant *C;
2618 if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2619 mask: m_Mask(Mask))),
2620 R: m_ImmConstant(C)))) {
2621 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2622 "Shuffle should not change scalar type");
2623
2624 bool ConstOp1 = isa<Constant>(Val: RHS);
2625 if (Constant *NewC =
2626 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2627 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2628 // which will cause UB for div/rem. Mask them with a safe constant.
2629 if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2630 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2631
2632 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2633 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2634 Value *NewLHS = ConstOp1 ? V1 : NewC;
2635 Value *NewRHS = ConstOp1 ? NewC : V1;
2636 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2637 }
2638 }
2639
2640 // Try to reassociate to sink a splat shuffle after a binary operation.
2641 if (Inst.isAssociative() && Inst.isCommutative()) {
2642 // Canonicalize shuffle operand as LHS.
2643 if (isa<ShuffleVectorInst>(Val: RHS))
2644 std::swap(a&: LHS, b&: RHS);
2645
2646 Value *X;
2647 ArrayRef<int> MaskC;
2648 int SplatIndex;
2649 Value *Y, *OtherOp;
2650 if (!match(V: LHS,
2651 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2652 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2653 X->getType() != Inst.getType() ||
2654 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2655 return nullptr;
2656
2657 // FIXME: This may not be safe if the analysis allows undef elements. By
2658 // moving 'Y' before the splat shuffle, we are implicitly assuming
2659 // that it is not undef/poison at the splat index.
2660 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2661 std::swap(a&: Y, b&: OtherOp);
2662 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2663 return nullptr;
2664 }
2665
2666 // X and Y are splatted values, so perform the binary operation on those
2667 // values followed by a splat followed by the 2nd binary operation:
2668 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2669 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2670 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2671 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2672 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2673
2674 // Intersect FMF on both new binops. Other (poison-generating) flags are
2675 // dropped to be safe.
2676 if (isa<FPMathOperator>(Val: R)) {
2677 R->copyFastMathFlags(I: &Inst);
2678 R->andIRFlags(V: RHS);
2679 }
2680 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2681 NewInstBO->copyIRFlags(V: R);
2682 return R;
2683 }
2684
2685 return nullptr;
2686}
2687
2688/// Try to narrow the width of a binop if at least 1 operand is an extend of
2689/// of a value. This requires a potentially expensive known bits check to make
2690/// sure the narrow op does not overflow.
2691Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2692 // We need at least one extended operand.
2693 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2694
2695 // If this is a sub, we swap the operands since we always want an extension
2696 // on the RHS. The LHS can be an extension or a constant.
2697 if (BO.getOpcode() == Instruction::Sub)
2698 std::swap(a&: Op0, b&: Op1);
2699
2700 Value *X;
2701 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2702 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2703 return nullptr;
2704
2705 // If both operands are the same extension from the same source type and we
2706 // can eliminate at least one (hasOneUse), this might work.
2707 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2708 Value *Y;
2709 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2710 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2711 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2712 // If that did not match, see if we have a suitable constant operand.
2713 // Truncating and extending must produce the same constant.
2714 Constant *WideC;
2715 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2716 return nullptr;
2717 Constant *NarrowC = getLosslessInvCast(C: WideC, InvCastTo: X->getType(), CastOp: CastOpc, DL);
2718 if (!NarrowC)
2719 return nullptr;
2720 Y = NarrowC;
2721 }
2722
2723 // Swap back now that we found our operands.
2724 if (BO.getOpcode() == Instruction::Sub)
2725 std::swap(a&: X, b&: Y);
2726
2727 // Both operands have narrow versions. Last step: the math must not overflow
2728 // in the narrow width.
2729 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2730 return nullptr;
2731
2732 // bo (ext X), (ext Y) --> ext (bo X, Y)
2733 // bo (ext X), C --> ext (bo X, C')
2734 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2735 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2736 if (IsSext)
2737 NewBinOp->setHasNoSignedWrap();
2738 else
2739 NewBinOp->setHasNoUnsignedWrap();
2740 }
2741 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2742}
2743
2744/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2745/// transform.
2746static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2747 GEPOperator &GEP2) {
2748 return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2749}
2750
2751/// Thread a GEP operation with constant indices through the constant true/false
2752/// arms of a select.
2753static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2754 InstCombiner::BuilderTy &Builder) {
2755 if (!GEP.hasAllConstantIndices())
2756 return nullptr;
2757
2758 Instruction *Sel;
2759 Value *Cond;
2760 Constant *TrueC, *FalseC;
2761 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2762 !match(V: Sel,
2763 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2764 return nullptr;
2765
2766 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2767 // Propagate 'inbounds' and metadata from existing instructions.
2768 // Note: using IRBuilder to create the constants for efficiency.
2769 SmallVector<Value *, 4> IndexC(GEP.indices());
2770 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2771 Type *Ty = GEP.getSourceElementType();
2772 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2773 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2774 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2775}
2776
2777// Canonicalization:
2778// gep T, (gep i8, base, C1), (Index + C2) into
2779// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2780static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2781 GEPOperator *Src,
2782 InstCombinerImpl &IC) {
2783 if (GEP.getNumIndices() != 1)
2784 return nullptr;
2785 auto &DL = IC.getDataLayout();
2786 Value *Base;
2787 const APInt *C1;
2788 if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2789 return nullptr;
2790 Value *VarIndex;
2791 const APInt *C2;
2792 Type *PtrTy = Src->getType()->getScalarType();
2793 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2794 if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2795 return nullptr;
2796 if (C1->getBitWidth() != IndexSizeInBits ||
2797 C2->getBitWidth() != IndexSizeInBits)
2798 return nullptr;
2799 Type *BaseType = GEP.getSourceElementType();
2800 if (isa<ScalableVectorType>(Val: BaseType))
2801 return nullptr;
2802 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2803 APInt NewOffset = TypeSize * *C2 + *C1;
2804 if (NewOffset.isZero() ||
2805 (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) {
2806 GEPNoWrapFlags Flags = GEPNoWrapFlags::none();
2807 if (GEP.hasNoUnsignedWrap() &&
2808 cast<GEPOperator>(Val: Src)->hasNoUnsignedWrap() &&
2809 match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()))) {
2810 Flags |= GEPNoWrapFlags::noUnsignedWrap();
2811 if (GEP.isInBounds() && cast<GEPOperator>(Val: Src)->isInBounds())
2812 Flags |= GEPNoWrapFlags::inBounds();
2813 }
2814
2815 Value *GEPConst =
2816 IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset), Name: "", NW: Flags);
2817 return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex, NW: Flags);
2818 }
2819
2820 return nullptr;
2821}
2822
2823/// Combine constant offsets separated by variable offsets.
2824/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2825static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2826 InstCombinerImpl &IC) {
2827 if (!GEP.hasAllConstantIndices())
2828 return nullptr;
2829
2830 GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2831 SmallVector<GetElementPtrInst *> Skipped;
2832 auto *InnerGEP = dyn_cast<GetElementPtrInst>(Val: GEP.getPointerOperand());
2833 while (true) {
2834 if (!InnerGEP)
2835 return nullptr;
2836
2837 NW = NW.intersectForReassociate(Other: InnerGEP->getNoWrapFlags());
2838 if (InnerGEP->hasAllConstantIndices())
2839 break;
2840
2841 if (!InnerGEP->hasOneUse())
2842 return nullptr;
2843
2844 Skipped.push_back(Elt: InnerGEP);
2845 InnerGEP = dyn_cast<GetElementPtrInst>(Val: InnerGEP->getPointerOperand());
2846 }
2847
2848 // The two constant offset GEPs are directly adjacent: Let normal offset
2849 // merging handle it.
2850 if (Skipped.empty())
2851 return nullptr;
2852
2853 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2854 // if profitable.
2855 if (!InnerGEP->hasOneUse())
2856 return nullptr;
2857
2858 // Don't bother with vector splats.
2859 Type *Ty = GEP.getType();
2860 if (InnerGEP->getType() != Ty)
2861 return nullptr;
2862
2863 const DataLayout &DL = IC.getDataLayout();
2864 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2865 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2866 !InnerGEP->accumulateConstantOffset(DL, Offset))
2867 return nullptr;
2868
2869 IC.replaceOperand(I&: *Skipped.back(), OpNum: 0, V: InnerGEP->getPointerOperand());
2870 for (GetElementPtrInst *SkippedGEP : Skipped)
2871 SkippedGEP->setNoWrapFlags(NW);
2872
2873 return IC.replaceInstUsesWith(
2874 I&: GEP,
2875 V: IC.Builder.CreatePtrAdd(Ptr: Skipped.front(), Offset: IC.Builder.getInt(AI: Offset), Name: "",
2876 NW: NW.intersectForOffsetAdd(Other: GEP.getNoWrapFlags())));
2877}
2878
2879Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2880 GEPOperator *Src) {
2881 // Combine Indices - If the source pointer to this getelementptr instruction
2882 // is a getelementptr instruction with matching element type, combine the
2883 // indices of the two getelementptr instructions into a single instruction.
2884 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2885 return nullptr;
2886
2887 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this))
2888 return I;
2889
2890 if (auto *I = combineConstantOffsets(GEP, IC&: *this))
2891 return I;
2892
2893 if (Src->getResultElementType() != GEP.getSourceElementType())
2894 return nullptr;
2895
2896 // Fold chained GEP with constant base into single GEP:
2897 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2898 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2899 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2900 Src->getNumIndices() == 1) {
2901 Value *SrcIdx = *Src->idx_begin();
2902 Value *GEPIdx = *GEP.idx_begin();
2903 const APInt *ConstOffset, *TrueVal, *FalseVal;
2904 Value *Cond;
2905
2906 if ((match(V: SrcIdx, P: m_APInt(Res&: ConstOffset)) &&
2907 match(V: GEPIdx,
2908 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal)))) ||
2909 (match(V: GEPIdx, P: m_APInt(Res&: ConstOffset)) &&
2910 match(V: SrcIdx,
2911 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal))))) {
2912 auto *Select = isa<SelectInst>(Val: GEPIdx) ? cast<SelectInst>(Val: GEPIdx)
2913 : cast<SelectInst>(Val: SrcIdx);
2914
2915 // Make sure the select has only one use.
2916 if (!Select->hasOneUse())
2917 return nullptr;
2918
2919 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2920 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2921 return nullptr;
2922
2923 APInt NewTrueVal = *ConstOffset + *TrueVal;
2924 APInt NewFalseVal = *ConstOffset + *FalseVal;
2925 Constant *NewTrue = ConstantInt::get(Ty: Select->getType(), V: NewTrueVal);
2926 Constant *NewFalse = ConstantInt::get(Ty: Select->getType(), V: NewFalseVal);
2927 Value *NewSelect = Builder.CreateSelect(
2928 C: Cond, True: NewTrue, False: NewFalse, /*Name=*/"",
2929 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2930 GEPNoWrapFlags Flags =
2931 getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2932 return replaceInstUsesWith(I&: GEP,
2933 V: Builder.CreateGEP(Ty: GEP.getResultElementType(),
2934 Ptr: Src->getPointerOperand(),
2935 IdxList: NewSelect, Name: "", NW: Flags));
2936 }
2937 }
2938
2939 // Find out whether the last index in the source GEP is a sequential idx.
2940 bool EndsWithSequential = false;
2941 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2942 I != E; ++I)
2943 EndsWithSequential = I.isSequential();
2944 if (!EndsWithSequential)
2945 return nullptr;
2946
2947 // Replace: gep (gep %P, long B), long A, ...
2948 // With: T = long A+B; gep %P, T, ...
2949 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands() - 1);
2950 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2951
2952 // If they aren't the same type, then the input hasn't been processed
2953 // by the loop above yet (which canonicalizes sequential index types to
2954 // intptr_t). Just avoid transforming this until the input has been
2955 // normalized.
2956 if (SO1->getType() != GO1->getType())
2957 return nullptr;
2958
2959 Value *Sum =
2960 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2961 // Only do the combine when we are sure the cost after the
2962 // merge is never more than that before the merge.
2963 if (Sum == nullptr)
2964 return nullptr;
2965
2966 SmallVector<Value *, 8> Indices;
2967 Indices.append(in_start: Src->op_begin() + 1, in_end: Src->op_end() - 1);
2968 Indices.push_back(Elt: Sum);
2969 Indices.append(in_start: GEP.op_begin() + 2, in_end: GEP.op_end());
2970
2971 // Don't create GEPs with more than one non-zero index.
2972 unsigned NumNonZeroIndices = count_if(Range&: Indices, P: [](Value *Idx) {
2973 auto *C = dyn_cast<Constant>(Val: Idx);
2974 return !C || !C->isNullValue();
2975 });
2976 if (NumNonZeroIndices > 1)
2977 return nullptr;
2978
2979 return replaceInstUsesWith(
2980 I&: GEP, V: Builder.CreateGEP(
2981 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
2982 NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2983}
2984
2985Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
2986 BuilderTy *Builder,
2987 bool &DoesConsume, unsigned Depth) {
2988 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2989 // ~(~(X)) -> X.
2990 Value *A, *B;
2991 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2992 DoesConsume = true;
2993 return A;
2994 }
2995
2996 Constant *C;
2997 // Constants can be considered to be not'ed values.
2998 if (match(V, P: m_ImmConstant(C)))
2999 return ConstantExpr::getNot(C);
3000
3001 if (Depth++ >= MaxAnalysisRecursionDepth)
3002 return nullptr;
3003
3004 // The rest of the cases require that we invert all uses so don't bother
3005 // doing the analysis if we know we can't use the result.
3006 if (!WillInvertAllUses)
3007 return nullptr;
3008
3009 // Compares can be inverted if all of their uses are being modified to use
3010 // the ~V.
3011 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
3012 if (Builder != nullptr)
3013 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
3014 RHS: I->getOperand(i_nocapture: 1));
3015 return NonNull;
3016 }
3017
3018 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3019 // `(-1 - B) - A` if we are willing to invert all of the uses.
3020 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3021 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3022 DoesConsume, Depth))
3023 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
3024 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3025 DoesConsume, Depth))
3026 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
3027 return nullptr;
3028 }
3029
3030 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3031 // into `A ^ B` if we are willing to invert all of the uses.
3032 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3033 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3034 DoesConsume, Depth))
3035 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
3036 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3037 DoesConsume, Depth))
3038 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
3039 return nullptr;
3040 }
3041
3042 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3043 // `A + (-1 - B)` if we are willing to invert all of the uses.
3044 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3045 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3046 DoesConsume, Depth))
3047 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
3048 return nullptr;
3049 }
3050
3051 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3052 // into `A s>> B` if we are willing to invert all of the uses.
3053 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3054 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3055 DoesConsume, Depth))
3056 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
3057 return nullptr;
3058 }
3059
3060 Value *Cond;
3061 // LogicOps are special in that we canonicalize them at the cost of an
3062 // instruction.
3063 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
3064 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
3065 // Selects/min/max with invertible operands are freely invertible
3066 if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3067 bool LocalDoesConsume = DoesConsume;
3068 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
3069 DoesConsume&: LocalDoesConsume, Depth))
3070 return nullptr;
3071 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3072 DoesConsume&: LocalDoesConsume, Depth)) {
3073 DoesConsume = LocalDoesConsume;
3074 if (Builder != nullptr) {
3075 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3076 DoesConsume, Depth);
3077 assert(NotB != nullptr &&
3078 "Unable to build inverted value for known freely invertable op");
3079 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
3080 return Builder->CreateBinaryIntrinsic(
3081 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
3082 return Builder->CreateSelect(
3083 C: Cond, True: NotA, False: NotB, Name: "",
3084 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : cast<Instruction>(Val: V));
3085 }
3086 return NonNull;
3087 }
3088 }
3089
3090 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
3091 bool LocalDoesConsume = DoesConsume;
3092 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
3093 for (Use &U : PN->operands()) {
3094 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3095 Value *NewIncomingVal = getFreelyInvertedImpl(
3096 V: U.get(), /*WillInvertAllUses=*/false,
3097 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
3098 if (NewIncomingVal == nullptr)
3099 return nullptr;
3100 // Make sure that we can safely erase the original PHI node.
3101 if (NewIncomingVal == V)
3102 return nullptr;
3103 if (Builder != nullptr)
3104 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
3105 }
3106
3107 DoesConsume = LocalDoesConsume;
3108 if (Builder != nullptr) {
3109 IRBuilderBase::InsertPointGuard Guard(*Builder);
3110 Builder->SetInsertPoint(PN);
3111 PHINode *NewPN =
3112 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
3113 for (auto [Val, Pred] : IncomingValues)
3114 NewPN->addIncoming(V: Val, BB: Pred);
3115 return NewPN;
3116 }
3117 return NonNull;
3118 }
3119
3120 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
3121 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3122 DoesConsume, Depth))
3123 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
3124 return nullptr;
3125 }
3126
3127 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
3128 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3129 DoesConsume, Depth))
3130 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
3131 return nullptr;
3132 }
3133
3134 // De Morgan's Laws:
3135 // (~(A | B)) -> (~A & ~B)
3136 // (~(A & B)) -> (~A | ~B)
3137 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3138 bool IsLogical, Value *A,
3139 Value *B) -> Value * {
3140 bool LocalDoesConsume = DoesConsume;
3141 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
3142 DoesConsume&: LocalDoesConsume, Depth))
3143 return nullptr;
3144 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3145 DoesConsume&: LocalDoesConsume, Depth)) {
3146 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3147 DoesConsume&: LocalDoesConsume, Depth);
3148 DoesConsume = LocalDoesConsume;
3149 if (IsLogical)
3150 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
3151 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
3152 }
3153
3154 return nullptr;
3155 };
3156
3157 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
3158 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3159 B);
3160
3161 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
3162 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3163 B);
3164
3165 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
3166 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3167 B);
3168
3169 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
3170 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3171 B);
3172
3173 return nullptr;
3174}
3175
3176/// Return true if we should canonicalize the gep to an i8 ptradd.
3177static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
3178 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3179 Type *GEPEltType = GEP.getSourceElementType();
3180 if (GEPEltType->isIntegerTy(Bitwidth: 8))
3181 return false;
3182
3183 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3184 // intrinsic. This has better support in BasicAA.
3185 if (GEPEltType->isScalableTy())
3186 return true;
3187
3188 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3189 // together.
3190 if (GEP.getNumIndices() == 1 &&
3191 match(V: GEP.getOperand(i_nocapture: 1),
3192 P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
3193 R: m_Shl(L: m_Value(), R: m_ConstantInt())))))
3194 return true;
3195
3196 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3197 // possibly be merged together.
3198 auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
3199 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3200 any_of(Range: GEP.indices(), P: [](Value *V) {
3201 const APInt *C;
3202 return match(V, P: m_APInt(Res&: C)) && !C->isZero();
3203 });
3204}
3205
3206static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN,
3207 IRBuilderBase &Builder) {
3208 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
3209 if (!Op1)
3210 return nullptr;
3211
3212 // Don't fold a GEP into itself through a PHI node. This can only happen
3213 // through the back-edge of a loop. Folding a GEP into itself means that
3214 // the value of the previous iteration needs to be stored in the meantime,
3215 // thus requiring an additional register variable to be live, but not
3216 // actually achieving anything (the GEP still needs to be executed once per
3217 // loop iteration).
3218 if (Op1 == &GEP)
3219 return nullptr;
3220 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3221
3222 int DI = -1;
3223
3224 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3225 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
3226 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3227 Op1->getSourceElementType() != Op2->getSourceElementType())
3228 return nullptr;
3229
3230 // As for Op1 above, don't try to fold a GEP into itself.
3231 if (Op2 == &GEP)
3232 return nullptr;
3233
3234 // Keep track of the type as we walk the GEP.
3235 Type *CurTy = nullptr;
3236
3237 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3238 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3239 return nullptr;
3240
3241 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3242 if (DI == -1) {
3243 // We have not seen any differences yet in the GEPs feeding the
3244 // PHI yet, so we record this one if it is allowed to be a
3245 // variable.
3246
3247 // The first two arguments can vary for any GEP, the rest have to be
3248 // static for struct slots
3249 if (J > 1) {
3250 assert(CurTy && "No current type?");
3251 if (CurTy->isStructTy())
3252 return nullptr;
3253 }
3254
3255 DI = J;
3256 } else {
3257 // The GEP is different by more than one input. While this could be
3258 // extended to support GEPs that vary by more than one variable it
3259 // doesn't make sense since it greatly increases the complexity and
3260 // would result in an R+R+R addressing mode which no backend
3261 // directly supports and would need to be broken into several
3262 // simpler instructions anyway.
3263 return nullptr;
3264 }
3265 }
3266
3267 // Sink down a layer of the type for the next iteration.
3268 if (J > 0) {
3269 if (J == 1) {
3270 CurTy = Op1->getSourceElementType();
3271 } else {
3272 CurTy =
3273 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3274 }
3275 }
3276 }
3277
3278 NW &= Op2->getNoWrapFlags();
3279 }
3280
3281 // If not all GEPs are identical we'll have to create a new PHI node.
3282 // Check that the old PHI node has only one use so that it will get
3283 // removed.
3284 if (DI != -1 && !PN->hasOneUse())
3285 return nullptr;
3286
3287 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3288 NewGEP->setNoWrapFlags(NW);
3289
3290 if (DI == -1) {
3291 // All the GEPs feeding the PHI are identical. Clone one down into our
3292 // BB so that it can be merged with the current GEP.
3293 } else {
3294 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3295 // into the current block so it can be merged, and create a new PHI to
3296 // set that index.
3297 PHINode *NewPN;
3298 {
3299 IRBuilderBase::InsertPointGuard Guard(Builder);
3300 Builder.SetInsertPoint(PN);
3301 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3302 NumReservedValues: PN->getNumOperands());
3303 }
3304
3305 for (auto &I : PN->operands())
3306 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3307 BB: PN->getIncomingBlock(U: I));
3308
3309 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3310 }
3311
3312 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3313 return NewGEP;
3314}
3315
3316Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3317 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3318 SmallVector<Value *, 8> Indices(GEP.indices());
3319 Type *GEPType = GEP.getType();
3320 Type *GEPEltType = GEP.getSourceElementType();
3321 if (Value *V =
3322 simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3323 Q: SQ.getWithInstruction(I: &GEP)))
3324 return replaceInstUsesWith(I&: GEP, V);
3325
3326 // For vector geps, use the generic demanded vector support.
3327 // Skip if GEP return type is scalable. The number of elements is unknown at
3328 // compile-time.
3329 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3330 auto VWidth = GEPFVTy->getNumElements();
3331 APInt PoisonElts(VWidth, 0);
3332 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3333 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3334 PoisonElts)) {
3335 if (V != &GEP)
3336 return replaceInstUsesWith(I&: GEP, V);
3337 return &GEP;
3338 }
3339 }
3340
3341 // Eliminate unneeded casts for indices, and replace indices which displace
3342 // by multiples of a zero size type with zero.
3343 bool MadeChange = false;
3344
3345 // Index width may not be the same width as pointer width.
3346 // Data layout chooses the right type based on supported integer types.
3347 Type *NewScalarIndexTy =
3348 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3349
3350 gep_type_iterator GTI = gep_type_begin(GEP);
3351 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3352 ++I, ++GTI) {
3353 // Skip indices into struct types.
3354 if (GTI.isStruct())
3355 continue;
3356
3357 Type *IndexTy = (*I)->getType();
3358 Type *NewIndexType =
3359 IndexTy->isVectorTy()
3360 ? VectorType::get(ElementType: NewScalarIndexTy,
3361 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3362 : NewScalarIndexTy;
3363
3364 // If the element type has zero size then any index over it is equivalent
3365 // to an index of zero, so replace it with zero if it is not zero already.
3366 Type *EltTy = GTI.getIndexedType();
3367 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3368 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
3369 *I = Constant::getNullValue(Ty: NewIndexType);
3370 MadeChange = true;
3371 }
3372
3373 if (IndexTy != NewIndexType) {
3374 // If we are using a wider index than needed for this platform, shrink
3375 // it to what we need. If narrower, sign-extend it to what we need.
3376 // This explicit cast can make subsequent optimizations more obvious.
3377 if (IndexTy->getScalarSizeInBits() <
3378 NewIndexType->getScalarSizeInBits()) {
3379 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3380 *I = Builder.CreateZExt(V: *I, DestTy: NewIndexType, Name: "", /*IsNonNeg=*/true);
3381 else
3382 *I = Builder.CreateSExt(V: *I, DestTy: NewIndexType);
3383 } else {
3384 *I = Builder.CreateTrunc(V: *I, DestTy: NewIndexType, Name: "", IsNUW: GEP.hasNoUnsignedWrap(),
3385 IsNSW: GEP.hasNoUnsignedSignedWrap());
3386 }
3387 MadeChange = true;
3388 }
3389 }
3390 if (MadeChange)
3391 return &GEP;
3392
3393 // Canonicalize constant GEPs to i8 type.
3394 if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) {
3395 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
3396 if (GEP.accumulateConstantOffset(DL, Offset))
3397 return replaceInstUsesWith(
3398 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3399 NW: GEP.getNoWrapFlags()));
3400 }
3401
3402 if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3403 Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3404 Value *NewGEP =
3405 Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3406 return replaceInstUsesWith(I&: GEP, V: NewGEP);
3407 }
3408
3409 // Strip trailing zero indices.
3410 auto *LastIdx = dyn_cast<Constant>(Val: Indices.back());
3411 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3412 return replaceInstUsesWith(
3413 I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: PtrOp,
3414 IdxList: drop_end(RangeOrContainer&: Indices), Name: "", NW: GEP.getNoWrapFlags()));
3415 }
3416
3417 // Strip leading zero indices.
3418 auto *FirstIdx = dyn_cast<Constant>(Val: Indices.front());
3419 if (FirstIdx && FirstIdx->isNullValue() &&
3420 !FirstIdx->getType()->isVectorTy()) {
3421 gep_type_iterator GTI = gep_type_begin(GEP);
3422 ++GTI;
3423 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3424 DL.getTypeAllocSize(Ty: GTI.getIndexedType()))
3425 return replaceInstUsesWith(I&: GEP, V: Builder.CreateGEP(Ty: GTI.getIndexedType(),
3426 Ptr: GEP.getPointerOperand(),
3427 IdxList: drop_begin(RangeOrContainer&: Indices), Name: "",
3428 NW: GEP.getNoWrapFlags()));
3429 }
3430
3431 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3432 // Note that this looses information about undef lanes; we run it after
3433 // demanded bits to partially mitigate that loss.
3434 if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3435 return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3436 })) {
3437 SmallVector<Value *> NewOps;
3438 for (auto &Op : GEP.operands()) {
3439 if (Op->getType()->isVectorTy())
3440 if (Value *Scalar = getSplatValue(V: Op)) {
3441 NewOps.push_back(Elt: Scalar);
3442 continue;
3443 }
3444 NewOps.push_back(Elt: Op);
3445 }
3446
3447 Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0],
3448 IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3449 NW: GEP.getNoWrapFlags());
3450 if (!Res->getType()->isVectorTy()) {
3451 ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3452 Res = Builder.CreateVectorSplat(EC, V: Res);
3453 }
3454 return replaceInstUsesWith(I&: GEP, V: Res);
3455 }
3456
3457 bool SeenNonZeroIndex = false;
3458 for (auto [IdxNum, Idx] : enumerate(First&: Indices)) {
3459 // Ignore one leading zero index.
3460 auto *C = dyn_cast<Constant>(Val: Idx);
3461 if (C && C->isNullValue() && IdxNum == 0)
3462 continue;
3463
3464 if (!SeenNonZeroIndex) {
3465 SeenNonZeroIndex = true;
3466 continue;
3467 }
3468
3469 // GEP has multiple non-zero indices: Split it.
3470 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(N: IdxNum);
3471 Value *FrontGEP =
3472 Builder.CreateGEP(Ty: GEPEltType, Ptr: PtrOp, IdxList: FrontIndices,
3473 Name: GEP.getName() + ".split", NW: GEP.getNoWrapFlags());
3474
3475 SmallVector<Value *> BackIndices;
3476 BackIndices.push_back(Elt: Constant::getNullValue(Ty: NewScalarIndexTy));
3477 append_range(C&: BackIndices, R: drop_begin(RangeOrContainer&: Indices, N: IdxNum));
3478 return GetElementPtrInst::Create(
3479 PointeeType: GetElementPtrInst::getIndexedType(Ty: GEPEltType, IdxList: FrontIndices), Ptr: FrontGEP,
3480 IdxList: BackIndices, NW: GEP.getNoWrapFlags());
3481 }
3482
3483 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3484 auto IsCanonicalType = [](Type *Ty) {
3485 if (auto *AT = dyn_cast<ArrayType>(Val: Ty))
3486 Ty = AT->getElementType();
3487 return Ty->isIntegerTy(Bitwidth: 8);
3488 };
3489 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3490 TypeSize Scale = DL.getTypeAllocSize(Ty: GEPEltType);
3491 assert(!Scale.isScalable() && "Should have been handled earlier");
3492 Type *NewElemTy = Builder.getInt8Ty();
3493 if (Scale.getFixedValue() != 1)
3494 NewElemTy = ArrayType::get(ElementType: NewElemTy, NumElements: Scale.getFixedValue());
3495 GEP.setSourceElementType(NewElemTy);
3496 GEP.setResultElementType(NewElemTy);
3497 // Don't bother revisiting the GEP after this change.
3498 MadeIRChange = true;
3499 }
3500
3501 // Check to see if the inputs to the PHI node are getelementptr instructions.
3502 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3503 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3504 return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp);
3505 }
3506
3507 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3508 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3509 return I;
3510
3511 if (GEP.getNumIndices() == 1) {
3512 unsigned AS = GEP.getPointerAddressSpace();
3513 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
3514 DL.getIndexSizeInBits(AS)) {
3515 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3516
3517 if (TyAllocSize == 1) {
3518 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3519 // but only if the result pointer is only used as if it were an integer.
3520 // (The case where the underlying object is the same is handled by
3521 // InstSimplify.)
3522 Value *X = GEP.getPointerOperand();
3523 Value *Y;
3524 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_Sub(L: m_PtrToIntOrAddr(Op: m_Value(V&: Y)),
3525 R: m_PtrToIntOrAddr(Op: m_Specific(V: X)))) &&
3526 GEPType == Y->getType()) {
3527 bool HasNonAddressBits =
3528 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3529 bool Changed = GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3530 return isa<PtrToAddrInst, ICmpInst>(Val: U.getUser()) ||
3531 (!HasNonAddressBits && isa<PtrToIntInst>(Val: U.getUser()));
3532 });
3533 return Changed ? &GEP : nullptr;
3534 }
3535 } else if (auto *ExactIns =
3536 dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) {
3537 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3538 Value *V;
3539 if (ExactIns->isExact()) {
3540 if ((has_single_bit(Value: TyAllocSize) &&
3541 match(V: GEP.getOperand(i_nocapture: 1),
3542 P: m_Shr(L: m_Value(V),
3543 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) ||
3544 match(V: GEP.getOperand(i_nocapture: 1),
3545 P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3546 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3547 Ptr: GEP.getPointerOperand(), IdxList: V,
3548 NW: GEP.getNoWrapFlags());
3549 }
3550 }
3551 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3552 // Try to canonicalize non-i8 element type to i8 if the index is an
3553 // exact instruction. If the index is an exact instruction (div/shr)
3554 // with a constant RHS, we can fold the non-i8 element scale into the
3555 // div/shr (similiar to the mul case, just inverted).
3556 const APInt *C;
3557 std::optional<APInt> NewC;
3558 if (has_single_bit(Value: TyAllocSize) &&
3559 match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3560 C->uge(RHS: countr_zero(Val: TyAllocSize)))
3561 NewC = *C - countr_zero(Val: TyAllocSize);
3562 else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3563 APInt Quot;
3564 uint64_t Rem;
3565 APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3566 if (Rem == 0)
3567 NewC = Quot;
3568 } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3569 APInt Quot;
3570 int64_t Rem;
3571 APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3572 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3573 if (!Quot.isAllOnes() && Rem == 0)
3574 NewC = Quot;
3575 }
3576
3577 if (NewC.has_value()) {
3578 Value *NewOp = Builder.CreateBinOp(
3579 Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3580 RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
3581 cast<BinaryOperator>(Val: NewOp)->setIsExact();
3582 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3583 Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3584 NW: GEP.getNoWrapFlags());
3585 }
3586 }
3587 }
3588 }
3589 }
3590 // We do not handle pointer-vector geps here.
3591 if (GEPType->isVectorTy())
3592 return nullptr;
3593
3594 if (!GEP.isInBounds()) {
3595 unsigned IdxWidth =
3596 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3597 APInt BasePtrOffset(IdxWidth, 0);
3598 Value *UnderlyingPtrOp =
3599 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3600 bool CanBeNull, CanBeFreed;
3601 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3602 DL, CanBeNull, CanBeFreed);
3603 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3604 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3605 BasePtrOffset.isNonNegative()) {
3606 APInt AllocSize(IdxWidth, DerefBytes);
3607 if (BasePtrOffset.ule(RHS: AllocSize)) {
3608 return GetElementPtrInst::CreateInBounds(
3609 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3610 }
3611 }
3612 }
3613 }
3614
3615 // nusw + nneg -> nuw
3616 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3617 all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3618 return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3619 })) {
3620 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3621 return &GEP;
3622 }
3623
3624 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3625 // to do this after having tried to derive "nuw" above.
3626 if (GEP.getNumIndices() == 1) {
3627 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3628 // geps if transforming into (gep (gep p, x), y).
3629 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3630 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3631 // that x + y does not have unsigned wrap.
3632 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3633 return GEP.getNoWrapFlags();
3634 return GEPNoWrapFlags::none();
3635 };
3636
3637 // Try to replace ADD + GEP with GEP + GEP.
3638 Value *Idx1, *Idx2;
3639 if (match(V: GEP.getOperand(i_nocapture: 1),
3640 P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3641 // %idx = add i64 %idx1, %idx2
3642 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3643 // as:
3644 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3645 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3646 bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3647 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3648 auto *NewPtr =
3649 Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3650 IdxList: Idx1, Name: "", NW: NWFlags);
3651 return replaceInstUsesWith(I&: GEP,
3652 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3653 Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3654 }
3655 ConstantInt *C;
3656 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3657 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3658 // %add = add nsw i32 %idx1, idx2
3659 // %sidx = sext i32 %add to i64
3660 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3661 // as:
3662 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3663 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3664 bool NUW = match(V: GEP.getOperand(i_nocapture: 1),
3665 P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3666 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3667 auto *NewPtr = Builder.CreateGEP(
3668 Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3669 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "", NW: NWFlags);
3670 return replaceInstUsesWith(
3671 I&: GEP,
3672 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3673 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()),
3674 Name: "", NW: NWFlags));
3675 }
3676 }
3677
3678 if (Instruction *R = foldSelectGEP(GEP, Builder))
3679 return R;
3680
3681 // srem -> (and/urem) for inbounds+nuw GEP
3682 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3683 Value *X, *Y;
3684
3685 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3686 if (match(V: Indices[0], P: m_OneUse(SubPattern: m_SRem(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
3687 isKnownToBeAPowerOfTwo(V: Y, /*OrZero=*/true, CxtI: &GEP)) {
3688 // If GEP is inbounds+nuw, the offset cannot be negative
3689 // -> srem by power-of-two can be treated as urem,
3690 // and urem by power-of-two folds to 'and' later.
3691 // OrZero=true is fine here because division by zero is UB.
3692 Instruction *OldIdxI = cast<Instruction>(Val: Indices[0]);
3693 Value *NewIdx = Builder.CreateURem(LHS: X, RHS: Y, Name: OldIdxI->getName());
3694
3695 return GetElementPtrInst::Create(PointeeType: GEPEltType, Ptr: PtrOp, IdxList: {NewIdx},
3696 NW: GEP.getNoWrapFlags());
3697 }
3698 }
3699
3700 return nullptr;
3701}
3702
3703static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
3704 Instruction *AI) {
3705 if (isa<ConstantPointerNull>(Val: V))
3706 return true;
3707 if (auto *LI = dyn_cast<LoadInst>(Val: V))
3708 return isa<GlobalVariable>(Val: LI->getPointerOperand());
3709 // Two distinct allocations will never be equal.
3710 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3711}
3712
3713/// Given a call CB which uses an address UsedV, return true if we can prove the
3714/// call's only possible effect is storing to V.
3715static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3716 const TargetLibraryInfo &TLI) {
3717 if (!CB.use_empty())
3718 // TODO: add recursion if returned attribute is present
3719 return false;
3720
3721 if (CB.isTerminator())
3722 // TODO: remove implementation restriction
3723 return false;
3724
3725 if (!CB.willReturn() || !CB.doesNotThrow())
3726 return false;
3727
3728 // If the only possible side effect of the call is writing to the alloca,
3729 // and the result isn't used, we can safely remove any reads implied by the
3730 // call including those which might read the alloca itself.
3731 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3732 return Dest && Dest->Ptr == UsedV;
3733}
3734
3735static std::optional<ModRefInfo>
3736isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
3737 const TargetLibraryInfo &TLI, bool KnowInit) {
3738 SmallVector<Instruction*, 4> Worklist;
3739 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3740 Worklist.push_back(Elt: AI);
3741 ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3742
3743 do {
3744 Instruction *PI = Worklist.pop_back_val();
3745 for (User *U : PI->users()) {
3746 Instruction *I = cast<Instruction>(Val: U);
3747 switch (I->getOpcode()) {
3748 default:
3749 // Give up the moment we see something we can't handle.
3750 return std::nullopt;
3751
3752 case Instruction::AddrSpaceCast:
3753 case Instruction::BitCast:
3754 case Instruction::GetElementPtr:
3755 Users.emplace_back(Args&: I);
3756 Worklist.push_back(Elt: I);
3757 continue;
3758
3759 case Instruction::ICmp: {
3760 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3761 // We can fold eq/ne comparisons with null to false/true, respectively.
3762 // We also fold comparisons in some conditions provided the alloc has
3763 // not escaped (see isNeverEqualToUnescapedAlloc).
3764 if (!ICI->isEquality())
3765 return std::nullopt;
3766 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3767 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3768 return std::nullopt;
3769
3770 // Do not fold compares to aligned_alloc calls, as they may have to
3771 // return null in case the required alignment cannot be satisfied,
3772 // unless we can prove that both alignment and size are valid.
3773 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3774 // Check if alignment and size of a call to aligned_alloc is valid,
3775 // that is alignment is a power-of-2 and the size is a multiple of the
3776 // alignment.
3777 const APInt *Alignment;
3778 const APInt *Size;
3779 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3780 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3781 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3782 };
3783 auto *CB = dyn_cast<CallBase>(Val: AI);
3784 LibFunc TheLibFunc;
3785 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3786 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3787 !AlignmentAndSizeKnownValid(CB))
3788 return std::nullopt;
3789 Users.emplace_back(Args&: I);
3790 continue;
3791 }
3792
3793 case Instruction::Call:
3794 // Ignore no-op and store intrinsics.
3795 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3796 switch (II->getIntrinsicID()) {
3797 default:
3798 return std::nullopt;
3799
3800 case Intrinsic::memmove:
3801 case Intrinsic::memcpy:
3802 case Intrinsic::memset: {
3803 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3804 if (MI->isVolatile())
3805 return std::nullopt;
3806 // Note: this could also be ModRef, but we can still interpret that
3807 // as just Mod in that case.
3808 ModRefInfo NewAccess =
3809 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3810 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3811 return std::nullopt;
3812 Access |= NewAccess;
3813 [[fallthrough]];
3814 }
3815 case Intrinsic::assume:
3816 case Intrinsic::invariant_start:
3817 case Intrinsic::invariant_end:
3818 case Intrinsic::lifetime_start:
3819 case Intrinsic::lifetime_end:
3820 case Intrinsic::objectsize:
3821 Users.emplace_back(Args&: I);
3822 continue;
3823 case Intrinsic::launder_invariant_group:
3824 case Intrinsic::strip_invariant_group:
3825 Users.emplace_back(Args&: I);
3826 Worklist.push_back(Elt: I);
3827 continue;
3828 }
3829 }
3830
3831 if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3832 getAllocationFamily(I, TLI: &TLI) == Family) {
3833 Users.emplace_back(Args&: I);
3834 continue;
3835 }
3836
3837 if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3838 getAllocationFamily(I, TLI: &TLI) == Family) {
3839 Users.emplace_back(Args&: I);
3840 Worklist.push_back(Elt: I);
3841 continue;
3842 }
3843
3844 if (!isRefSet(MRI: Access) &&
3845 isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3846 Access |= ModRefInfo::Mod;
3847 Users.emplace_back(Args&: I);
3848 continue;
3849 }
3850
3851 return std::nullopt;
3852
3853 case Instruction::Store: {
3854 StoreInst *SI = cast<StoreInst>(Val: I);
3855 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3856 return std::nullopt;
3857 if (isRefSet(MRI: Access))
3858 return std::nullopt;
3859 Access |= ModRefInfo::Mod;
3860 Users.emplace_back(Args&: I);
3861 continue;
3862 }
3863
3864 case Instruction::Load: {
3865 LoadInst *LI = cast<LoadInst>(Val: I);
3866 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3867 return std::nullopt;
3868 if (isModSet(MRI: Access))
3869 return std::nullopt;
3870 Access |= ModRefInfo::Ref;
3871 Users.emplace_back(Args&: I);
3872 continue;
3873 }
3874 }
3875 llvm_unreachable("missing a return?");
3876 }
3877 } while (!Worklist.empty());
3878
3879 assert(Access != ModRefInfo::ModRef);
3880 return Access;
3881}
3882
3883Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3884 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3885
3886 // If we have a malloc call which is only used in any amount of comparisons to
3887 // null and free calls, delete the calls and replace the comparisons with true
3888 // or false as appropriate.
3889
3890 // This is based on the principle that we can substitute our own allocation
3891 // function (which will never return null) rather than knowledge of the
3892 // specific function being called. In some sense this can change the permitted
3893 // outputs of a program (when we convert a malloc to an alloca, the fact that
3894 // the allocation is now on the stack is potentially visible, for example),
3895 // but we believe in a permissible manner.
3896 SmallVector<WeakTrackingVH, 64> Users;
3897
3898 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3899 // before each store.
3900 SmallVector<DbgVariableRecord *, 8> DVRs;
3901 std::unique_ptr<DIBuilder> DIB;
3902 if (isa<AllocaInst>(Val: MI)) {
3903 findDbgUsers(V: &MI, DbgVariableRecords&: DVRs);
3904 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3905 }
3906
3907 // Determine what getInitialValueOfAllocation would return without actually
3908 // allocating the result.
3909 bool KnowInitUndef = false;
3910 bool KnowInitZero = false;
3911 Constant *Init =
3912 getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3913 if (Init) {
3914 if (isa<UndefValue>(Val: Init))
3915 KnowInitUndef = true;
3916 else if (Init->isNullValue())
3917 KnowInitZero = true;
3918 }
3919 // The various sanitizers don't actually return undef memory, but rather
3920 // memory initialized with special forms of runtime poison
3921 auto &F = *MI.getFunction();
3922 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3923 F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3924 KnowInitUndef = false;
3925
3926 auto Removable =
3927 isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef);
3928 if (Removable) {
3929 for (WeakTrackingVH &User : Users) {
3930 // Lowering all @llvm.objectsize and MTI calls first because they may use
3931 // a bitcast/GEP of the alloca we are removing.
3932 if (!User)
3933 continue;
3934
3935 Instruction *I = cast<Instruction>(Val: &*User);
3936
3937 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3938 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3939 SmallVector<Instruction *> InsertedInstructions;
3940 Value *Result = lowerObjectSizeCall(
3941 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3942 for (Instruction *Inserted : InsertedInstructions)
3943 Worklist.add(I: Inserted);
3944 replaceInstUsesWith(I&: *I, V: Result);
3945 eraseInstFromFunction(I&: *I);
3946 User = nullptr; // Skip examining in the next loop.
3947 continue;
3948 }
3949 if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3950 if (KnowInitZero && isRefSet(MRI: *Removable)) {
3951 IRBuilderBase::InsertPointGuard Guard(Builder);
3952 Builder.SetInsertPoint(MTI);
3953 auto *M = Builder.CreateMemSet(
3954 Ptr: MTI->getRawDest(),
3955 Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0),
3956 Size: MTI->getLength(), Align: MTI->getDestAlign());
3957 M->copyMetadata(SrcInst: *MTI);
3958 }
3959 }
3960 }
3961 }
3962 for (WeakTrackingVH &User : Users) {
3963 if (!User)
3964 continue;
3965
3966 Instruction *I = cast<Instruction>(Val: &*User);
3967
3968 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3969 replaceInstUsesWith(I&: *C,
3970 V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3971 V: C->isFalseWhenEqual()));
3972 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3973 for (auto *DVR : DVRs)
3974 if (DVR->isAddressOfVariable())
3975 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3976 } else {
3977 // Casts, GEP, or anything else: we're about to delete this instruction,
3978 // so it can not have any valid uses.
3979 Constant *Replace;
3980 if (isa<LoadInst>(Val: I)) {
3981 assert(KnowInitZero || KnowInitUndef);
3982 Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
3983 : Constant::getNullValue(Ty: I->getType());
3984 } else
3985 Replace = PoisonValue::get(T: I->getType());
3986 replaceInstUsesWith(I&: *I, V: Replace);
3987 }
3988 eraseInstFromFunction(I&: *I);
3989 }
3990
3991 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3992 // Replace invoke with a NOP intrinsic to maintain the original CFG
3993 Module *M = II->getModule();
3994 Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
3995 auto *NewII = InvokeInst::Create(
3996 Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
3997 NewII->setDebugLoc(II->getDebugLoc());
3998 }
3999
4000 // Remove debug intrinsics which describe the value contained within the
4001 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4002 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4003 //
4004 // ```
4005 // define void @foo(i32 %0) {
4006 // %a = alloca i32 ; Deleted.
4007 // store i32 %0, i32* %a
4008 // dbg.value(i32 %0, "arg0") ; Not deleted.
4009 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4010 // call void @trivially_inlinable_no_op(i32* %a)
4011 // ret void
4012 // }
4013 // ```
4014 //
4015 // This may not be required if we stop describing the contents of allocas
4016 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4017 // the LowerDbgDeclare utility.
4018 //
4019 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4020 // "arg0" dbg.value may be stale after the call. However, failing to remove
4021 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4022 //
4023 // FIXME: the Assignment Tracking project has now likely made this
4024 // redundant (and it's sometimes harmful).
4025 for (auto *DVR : DVRs)
4026 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4027 DVR->eraseFromParent();
4028
4029 return eraseInstFromFunction(I&: MI);
4030 }
4031 return nullptr;
4032}
4033
4034/// Move the call to free before a NULL test.
4035///
4036/// Check if this free is accessed after its argument has been test
4037/// against NULL (property 0).
4038/// If yes, it is legal to move this call in its predecessor block.
4039///
4040/// The move is performed only if the block containing the call to free
4041/// will be removed, i.e.:
4042/// 1. it has only one predecessor P, and P has two successors
4043/// 2. it contains the call, noops, and an unconditional branch
4044/// 3. its successor is the same as its predecessor's successor
4045///
4046/// The profitability is out-of concern here and this function should
4047/// be called only if the caller knows this transformation would be
4048/// profitable (e.g., for code size).
4049static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
4050 const DataLayout &DL) {
4051 Value *Op = FI.getArgOperand(i: 0);
4052 BasicBlock *FreeInstrBB = FI.getParent();
4053 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4054
4055 // Validate part of constraint #1: Only one predecessor
4056 // FIXME: We can extend the number of predecessor, but in that case, we
4057 // would duplicate the call to free in each predecessor and it may
4058 // not be profitable even for code size.
4059 if (!PredBB)
4060 return nullptr;
4061
4062 // Validate constraint #2: Does this block contains only the call to
4063 // free, noops, and an unconditional branch?
4064 BasicBlock *SuccBB;
4065 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4066 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
4067 return nullptr;
4068
4069 // If there are only 2 instructions in the block, at this point,
4070 // this is the call to free and unconditional.
4071 // If there are more than 2 instructions, check that they are noops
4072 // i.e., they won't hurt the performance of the generated code.
4073 if (FreeInstrBB->size() != 2) {
4074 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
4075 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
4076 continue;
4077 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
4078 if (!Cast || !Cast->isNoopCast(DL))
4079 return nullptr;
4080 }
4081 }
4082 // Validate the rest of constraint #1 by matching on the pred branch.
4083 Instruction *TI = PredBB->getTerminator();
4084 BasicBlock *TrueBB, *FalseBB;
4085 CmpPredicate Pred;
4086 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
4087 L: m_CombineOr(L: m_Specific(V: Op),
4088 R: m_Specific(V: Op->stripPointerCasts())),
4089 R: m_Zero()),
4090 T&: TrueBB, F&: FalseBB)))
4091 return nullptr;
4092 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4093 return nullptr;
4094
4095 // Validate constraint #3: Ensure the null case just falls through.
4096 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4097 return nullptr;
4098 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4099 "Broken CFG: missing edge from predecessor to successor");
4100
4101 // At this point, we know that everything in FreeInstrBB can be moved
4102 // before TI.
4103 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
4104 if (&Instr == FreeInstrBBTerminator)
4105 break;
4106 Instr.moveBeforePreserving(MovePos: TI->getIterator());
4107 }
4108 assert(FreeInstrBB->size() == 1 &&
4109 "Only the branch instruction should remain");
4110
4111 // Now that we've moved the call to free before the NULL check, we have to
4112 // remove any attributes on its parameter that imply it's non-null, because
4113 // those attributes might have only been valid because of the NULL check, and
4114 // we can get miscompiles if we keep them. This is conservative if non-null is
4115 // also implied by something other than the NULL check, but it's guaranteed to
4116 // be correct, and the conservativeness won't matter in practice, since the
4117 // attributes are irrelevant for the call to free itself and the pointer
4118 // shouldn't be used after the call.
4119 AttributeList Attrs = FI.getAttributes();
4120 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull);
4121 Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable);
4122 if (Dereferenceable.isValid()) {
4123 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4124 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0,
4125 Kind: Attribute::Dereferenceable);
4126 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
4127 }
4128 FI.setAttributes(Attrs);
4129
4130 return &FI;
4131}
4132
4133Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
4134 // free undef -> unreachable.
4135 if (isa<UndefValue>(Val: Op)) {
4136 // Leave a marker since we can't modify the CFG here.
4137 CreateNonTerminatorUnreachable(InsertAt: &FI);
4138 return eraseInstFromFunction(I&: FI);
4139 }
4140
4141 // If we have 'free null' delete the instruction. This can happen in stl code
4142 // when lots of inlining happens.
4143 if (isa<ConstantPointerNull>(Val: Op))
4144 return eraseInstFromFunction(I&: FI);
4145
4146 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4147 // realloc() entirely.
4148 CallInst *CI = dyn_cast<CallInst>(Val: Op);
4149 if (CI && CI->hasOneUse())
4150 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
4151 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
4152
4153 // If we optimize for code size, try to move the call to free before the null
4154 // test so that simplify cfg can remove the empty block and dead code
4155 // elimination the branch. I.e., helps to turn something like:
4156 // if (foo) free(foo);
4157 // into
4158 // free(foo);
4159 //
4160 // Note that we can only do this for 'free' and not for any flavor of
4161 // 'operator delete'; there is no 'operator delete' symbol for which we are
4162 // permitted to invent a call, even if we're passing in a null pointer.
4163 if (MinimizeSize) {
4164 LibFunc Func;
4165 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
4166 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
4167 return I;
4168 }
4169
4170 return nullptr;
4171}
4172
4173Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
4174 Value *RetVal = RI.getReturnValue();
4175 if (!RetVal)
4176 return nullptr;
4177
4178 Function *F = RI.getFunction();
4179 Type *RetTy = RetVal->getType();
4180 if (RetTy->isPointerTy()) {
4181 bool HasDereferenceable =
4182 F->getAttributes().getRetDereferenceableBytes() > 0;
4183 if (F->hasRetAttribute(Kind: Attribute::NonNull) ||
4184 (HasDereferenceable &&
4185 !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
4186 if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
4187 return replaceOperand(I&: RI, OpNum: 0, V);
4188 }
4189 }
4190
4191 if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
4192 return nullptr;
4193
4194 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4195 if (ReturnClass == fcNone)
4196 return nullptr;
4197
4198 KnownFPClass KnownClass;
4199 if (SimplifyDemandedFPClass(I: &RI, Op: 0, DemandedMask: ~ReturnClass, Known&: KnownClass,
4200 Q: SQ.getWithInstruction(I: &RI)))
4201 return &RI;
4202
4203 return nullptr;
4204}
4205
4206// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4207bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
4208 // Try to remove the previous instruction if it must lead to unreachable.
4209 // This includes instructions like stores and "llvm.assume" that may not get
4210 // removed by simple dead code elimination.
4211 bool Changed = false;
4212 while (Instruction *Prev = I.getPrevNode()) {
4213 // While we theoretically can erase EH, that would result in a block that
4214 // used to start with an EH no longer starting with EH, which is invalid.
4215 // To make it valid, we'd need to fixup predecessors to no longer refer to
4216 // this block, but that changes CFG, which is not allowed in InstCombine.
4217 if (Prev->isEHPad())
4218 break; // Can not drop any more instructions. We're done here.
4219
4220 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
4221 break; // Can not drop any more instructions. We're done here.
4222 // Otherwise, this instruction can be freely erased,
4223 // even if it is not side-effect free.
4224
4225 // A value may still have uses before we process it here (for example, in
4226 // another unreachable block), so convert those to poison.
4227 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
4228 eraseInstFromFunction(I&: *Prev);
4229 Changed = true;
4230 }
4231 return Changed;
4232}
4233
4234Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
4235 removeInstructionsBeforeUnreachable(I);
4236 return nullptr;
4237}
4238
4239Instruction *InstCombinerImpl::visitUncondBrInst(UncondBrInst &BI) {
4240 // If this store is the second-to-last instruction in the basic block
4241 // (excluding debug info) and if the block ends with
4242 // an unconditional branch, try to move the store to the successor block.
4243
4244 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4245 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4246 do {
4247 if (BBI != FirstInstr)
4248 --BBI;
4249 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4250
4251 return dyn_cast<StoreInst>(Val&: BBI);
4252 };
4253
4254 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4255 if (mergeStoreIntoSuccessor(SI&: *SI))
4256 return &BI;
4257
4258 return nullptr;
4259}
4260
4261void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
4262 SmallVectorImpl<BasicBlock *> &Worklist) {
4263 if (!DeadEdges.insert(V: {From, To}).second)
4264 return;
4265
4266 // Replace phi node operands in successor with poison.
4267 for (PHINode &PN : To->phis())
4268 for (Use &U : PN.incoming_values())
4269 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
4270 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
4271 addToWorklist(I: &PN);
4272 MadeIRChange = true;
4273 }
4274
4275 Worklist.push_back(Elt: To);
4276}
4277
4278// Under the assumption that I is unreachable, remove it and following
4279// instructions. Changes are reported directly to MadeIRChange.
4280void InstCombinerImpl::handleUnreachableFrom(
4281 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
4282 BasicBlock *BB = I->getParent();
4283 for (Instruction &Inst : make_early_inc_range(
4284 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
4285 y: std::next(x: I->getReverseIterator())))) {
4286 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4287 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
4288 MadeIRChange = true;
4289 }
4290 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4291 continue;
4292 // RemoveDIs: erase debug-info on this instruction manually.
4293 Inst.dropDbgRecords();
4294 eraseInstFromFunction(I&: Inst);
4295 MadeIRChange = true;
4296 }
4297
4298 SmallVector<Value *> Changed;
4299 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
4300 MadeIRChange = true;
4301 for (Value *V : Changed)
4302 addToWorklist(I: cast<Instruction>(Val: V));
4303 }
4304
4305 // Handle potentially dead successors.
4306 for (BasicBlock *Succ : successors(BB))
4307 addDeadEdge(From: BB, To: Succ, Worklist);
4308}
4309
4310void InstCombinerImpl::handlePotentiallyDeadBlocks(
4311 SmallVectorImpl<BasicBlock *> &Worklist) {
4312 while (!Worklist.empty()) {
4313 BasicBlock *BB = Worklist.pop_back_val();
4314 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
4315 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
4316 }))
4317 continue;
4318
4319 handleUnreachableFrom(I: &BB->front(), Worklist);
4320 }
4321}
4322
4323void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4324 BasicBlock *LiveSucc) {
4325 SmallVector<BasicBlock *> Worklist;
4326 for (BasicBlock *Succ : successors(BB)) {
4327 // The live successor isn't dead.
4328 if (Succ == LiveSucc)
4329 continue;
4330
4331 addDeadEdge(From: BB, To: Succ, Worklist);
4332 }
4333
4334 handlePotentiallyDeadBlocks(Worklist);
4335}
4336
4337Instruction *InstCombinerImpl::visitCondBrInst(CondBrInst &BI) {
4338 // Change br (not X), label True, label False to: br X, label False, True
4339 Value *Cond = BI.getCondition();
4340 Value *X;
4341 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4342 // Swap Destinations and condition...
4343 BI.swapSuccessors();
4344 if (BPI)
4345 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4346 return replaceOperand(I&: BI, OpNum: 0, V: X);
4347 }
4348
4349 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4350 // This is done by inverting the condition and swapping successors:
4351 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4352 Value *Y;
4353 if (isa<SelectInst>(Val: Cond) &&
4354 match(V: Cond,
4355 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4356 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4357 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4358
4359 // Set weights for the new OR select instruction too.
4360 if (!ProfcheckDisableMetadataFixes) {
4361 if (auto *OrInst = dyn_cast<Instruction>(Val: Or)) {
4362 if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
4363 SmallVector<uint32_t> Weights;
4364 if (extractBranchWeights(I: *CondInst, Weights)) {
4365 assert(Weights.size() == 2 &&
4366 "Unexpected number of branch weights!");
4367 std::swap(a&: Weights[0], b&: Weights[1]);
4368 setBranchWeights(I&: *OrInst, Weights, /*IsExpected=*/false);
4369 }
4370 }
4371 }
4372 }
4373 BI.swapSuccessors();
4374 if (BPI)
4375 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4376 return replaceOperand(I&: BI, OpNum: 0, V: Or);
4377 }
4378
4379 // If the condition is irrelevant, remove the use so that other
4380 // transforms on the condition become more effective.
4381 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
4382 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
4383
4384 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4385 CmpPredicate Pred;
4386 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4387 !isCanonicalPredicate(Pred)) {
4388 // Swap destinations and condition.
4389 auto *Cmp = cast<CmpInst>(Val: Cond);
4390 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4391 BI.swapSuccessors();
4392 if (BPI)
4393 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4394 Worklist.push(I: Cmp);
4395 return &BI;
4396 }
4397
4398 if (isa<UndefValue>(Val: Cond)) {
4399 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
4400 return nullptr;
4401 }
4402 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4403 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4404 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4405 return nullptr;
4406 }
4407
4408 // Replace all dominated uses of the condition with true/false
4409 // Ignore constant expressions to avoid iterating over uses on other
4410 // functions.
4411 if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) {
4412 for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4413 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0));
4414 if (DT.dominates(BBE: Edge0, U)) {
4415 replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4416 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4417 continue;
4418 }
4419 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1));
4420 if (DT.dominates(BBE: Edge1, U)) {
4421 replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4422 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4423 }
4424 }
4425 }
4426
4427 DC.registerBranch(BI: &BI);
4428 return nullptr;
4429}
4430
4431// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4432// we can prove that both (switch C) and (switch X) go to the default when cond
4433// is false/true.
4434static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4435 SelectInst *Select,
4436 bool IsTrueArm) {
4437 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4438 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4439 if (!C)
4440 return nullptr;
4441
4442 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4443 if (CstBB != SI.getDefaultDest())
4444 return nullptr;
4445 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
4446 CmpPredicate Pred;
4447 const APInt *RHSC;
4448 if (!match(V: Select->getCondition(),
4449 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4450 return nullptr;
4451 if (IsTrueArm)
4452 Pred = ICmpInst::getInversePredicate(pred: Pred);
4453
4454 // See whether we can replace the select with X
4455 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4456 for (auto Case : SI.cases())
4457 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4458 return nullptr;
4459
4460 return X;
4461}
4462
4463Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4464 Value *Cond = SI.getCondition();
4465 Value *Op0;
4466 const APInt *CondOpC;
4467 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4468
4469 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4470 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))))
4471 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4472 return [](const APInt &Case, const APInt &C) { return Case - C; };
4473
4474 if (match(V: Cond, P: m_Sub(L: m_APInt(Res&: CondOpC), R: m_Value(V&: Op0))))
4475 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4476 return [](const APInt &Case, const APInt &C) { return C - Case; };
4477
4478 if (match(V: Cond, P: m_Xor(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))) &&
4479 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4480 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4481 // Prevent creation of large case values by excluding extremes.
4482 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4483
4484 return nullptr;
4485 };
4486
4487 // Attempt to invert and simplify the switch condition, as long as the
4488 // condition is not used further, as it may not be profitable otherwise.
4489 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4490 for (auto &Case : SI.cases()) {
4491 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4492 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: New));
4493 }
4494 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4495 }
4496
4497 uint64_t ShiftAmt;
4498 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4499 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4500 all_of(Range: SI.cases(), P: [&](const auto &Case) {
4501 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4502 })) {
4503 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4504 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4505 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4506 Shl->hasOneUse()) {
4507 Value *NewCond = Op0;
4508 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4509 // If the shift may wrap, we need to mask off the shifted bits.
4510 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4511 NewCond = Builder.CreateAnd(
4512 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4513 }
4514 for (auto Case : SI.cases()) {
4515 const APInt &CaseVal = Case.getCaseValue()->getValue();
4516 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4517 : CaseVal.lshr(shiftAmt: ShiftAmt);
4518 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4519 }
4520 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4521 }
4522 }
4523
4524 // Fold switch(zext/sext(X)) into switch(X) if possible.
4525 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4526 bool IsZExt = isa<ZExtInst>(Val: Cond);
4527 Type *SrcTy = Op0->getType();
4528 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4529
4530 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4531 const APInt &CaseVal = Case.getCaseValue()->getValue();
4532 return IsZExt ? CaseVal.isIntN(N: NewWidth)
4533 : CaseVal.isSignedIntN(N: NewWidth);
4534 })) {
4535 for (auto &Case : SI.cases()) {
4536 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4537 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4538 }
4539 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4540 }
4541 }
4542
4543 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4544 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4545 if (Value *V =
4546 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4547 return replaceOperand(I&: SI, OpNum: 0, V);
4548 if (Value *V =
4549 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4550 return replaceOperand(I&: SI, OpNum: 0, V);
4551 }
4552
4553 KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4554 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4555 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4556
4557 // Compute the number of leading bits we can ignore.
4558 // TODO: A better way to determine this would use ComputeNumSignBits().
4559 for (const auto &C : SI.cases()) {
4560 LeadingKnownZeros =
4561 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4562 LeadingKnownOnes =
4563 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4564 }
4565
4566 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4567
4568 // Shrink the condition operand if the new type is smaller than the old type.
4569 // But do not shrink to a non-standard type, because backend can't generate
4570 // good code for that yet.
4571 // TODO: We can make it aggressive again after fixing PR39569.
4572 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4573 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4574 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4575 Builder.SetInsertPoint(&SI);
4576 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4577
4578 for (auto Case : SI.cases()) {
4579 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4580 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4581 }
4582 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4583 }
4584
4585 if (isa<UndefValue>(Val: Cond)) {
4586 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
4587 return nullptr;
4588 }
4589 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4590 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4591 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4592 return nullptr;
4593 }
4594
4595 return nullptr;
4596}
4597
4598Instruction *
4599InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4600 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4601 if (!WO)
4602 return nullptr;
4603
4604 Intrinsic::ID OvID = WO->getIntrinsicID();
4605 const APInt *C = nullptr;
4606 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4607 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4608 OvID == Intrinsic::umul_with_overflow)) {
4609 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4610 if (C->isAllOnes())
4611 return BinaryOperator::CreateNeg(Op: WO->getLHS());
4612 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4613 if (C->isPowerOf2()) {
4614 return BinaryOperator::CreateShl(
4615 V1: WO->getLHS(),
4616 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4617 }
4618 }
4619 }
4620
4621 // We're extracting from an overflow intrinsic. See if we're the only user.
4622 // That allows us to simplify multiple result intrinsics to simpler things
4623 // that just get one value.
4624 if (!WO->hasOneUse())
4625 return nullptr;
4626
4627 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4628 // and replace it with a traditional binary instruction.
4629 if (*EV.idx_begin() == 0) {
4630 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4631 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4632 // Replace the old instruction's uses with poison.
4633 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4634 eraseInstFromFunction(I&: *WO);
4635 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4636 }
4637
4638 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4639
4640 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4641 if (OvID == Intrinsic::usub_with_overflow)
4642 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4643
4644 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4645 // +1 is not possible because we assume signed values.
4646 if (OvID == Intrinsic::smul_with_overflow &&
4647 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
4648 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4649
4650 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4651 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4652 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4653 // Only handle even bitwidths for performance reasons.
4654 if (BitWidth % 2 == 0)
4655 return new ICmpInst(
4656 ICmpInst::ICMP_UGT, WO->getLHS(),
4657 ConstantInt::get(Ty: WO->getLHS()->getType(),
4658 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
4659 }
4660
4661 // If only the overflow result is used, and the right hand side is a
4662 // constant (or constant splat), we can remove the intrinsic by directly
4663 // checking for overflow.
4664 if (C) {
4665 // Compute the no-wrap range for LHS given RHS=C, then construct an
4666 // equivalent icmp, potentially using an offset.
4667 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4668 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4669
4670 CmpInst::Predicate Pred;
4671 APInt NewRHSC, Offset;
4672 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4673 auto *OpTy = WO->getRHS()->getType();
4674 auto *NewLHS = WO->getLHS();
4675 if (Offset != 0)
4676 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4677 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4678 ConstantInt::get(Ty: OpTy, V: NewRHSC));
4679 }
4680
4681 return nullptr;
4682}
4683
4684static Value *foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall,
4685 SelectInst *SelectInst,
4686 InstCombiner::BuilderTy &Builder) {
4687 // Helper to fold frexp of select to select of frexp.
4688
4689 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4690 return nullptr;
4691 Value *Cond = SelectInst->getCondition();
4692 Value *TrueVal = SelectInst->getTrueValue();
4693 Value *FalseVal = SelectInst->getFalseValue();
4694
4695 const APFloat *ConstVal = nullptr;
4696 Value *VarOp = nullptr;
4697 bool ConstIsTrue = false;
4698
4699 if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4700 VarOp = FalseVal;
4701 ConstIsTrue = true;
4702 } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4703 VarOp = TrueVal;
4704 ConstIsTrue = false;
4705 } else {
4706 return nullptr;
4707 }
4708
4709 Builder.SetInsertPoint(&EV);
4710
4711 CallInst *NewFrexp =
4712 Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4713 NewFrexp->copyIRFlags(V: FrexpCall);
4714
4715 Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa");
4716
4717 int Exp;
4718 APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4719
4720 Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4721
4722 Value *NewSel = Builder.CreateSelectFMF(
4723 C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4724 False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4725 return NewSel;
4726}
4727Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4728 Value *Agg = EV.getAggregateOperand();
4729
4730 if (!EV.hasIndices())
4731 return replaceInstUsesWith(I&: EV, V: Agg);
4732
4733 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4734 Q: SQ.getWithInstruction(I: &EV)))
4735 return replaceInstUsesWith(I&: EV, V);
4736
4737 Value *Cond, *TrueVal, *FalseVal;
4738 if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4739 C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4740 auto *SelInst =
4741 cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0));
4742 if (Value *Result =
4743 foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4744 return replaceInstUsesWith(I&: EV, V: Result);
4745 }
4746 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4747 // We're extracting from an insertvalue instruction, compare the indices
4748 const unsigned *exti, *exte, *insi, *inse;
4749 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4750 exte = EV.idx_end(), inse = IV->idx_end();
4751 exti != exte && insi != inse;
4752 ++exti, ++insi) {
4753 if (*insi != *exti)
4754 // The insert and extract both reference distinctly different elements.
4755 // This means the extract is not influenced by the insert, and we can
4756 // replace the aggregate operand of the extract with the aggregate
4757 // operand of the insert. i.e., replace
4758 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4759 // %E = extractvalue { i32, { i32 } } %I, 0
4760 // with
4761 // %E = extractvalue { i32, { i32 } } %A, 0
4762 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4763 Idxs: EV.getIndices());
4764 }
4765 if (exti == exte && insi == inse)
4766 // Both iterators are at the end: Index lists are identical. Replace
4767 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4768 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4769 // with "i32 42"
4770 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4771 if (exti == exte) {
4772 // The extract list is a prefix of the insert list. i.e. replace
4773 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4774 // %E = extractvalue { i32, { i32 } } %I, 1
4775 // with
4776 // %X = extractvalue { i32, { i32 } } %A, 1
4777 // %E = insertvalue { i32 } %X, i32 42, 0
4778 // by switching the order of the insert and extract (though the
4779 // insertvalue should be left in, since it may have other uses).
4780 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4781 Idxs: EV.getIndices());
4782 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4783 Idxs: ArrayRef(insi, inse));
4784 }
4785 if (insi == inse)
4786 // The insert list is a prefix of the extract list
4787 // We can simply remove the common indices from the extract and make it
4788 // operate on the inserted value instead of the insertvalue result.
4789 // i.e., replace
4790 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4791 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4792 // with
4793 // %E extractvalue { i32 } { i32 42 }, 0
4794 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4795 Idxs: ArrayRef(exti, exte));
4796 }
4797
4798 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4799 return R;
4800
4801 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4802 // Bail out if the aggregate contains scalable vector type
4803 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4804 STy && STy->isScalableTy())
4805 return nullptr;
4806
4807 // If the (non-volatile) load only has one use, we can rewrite this to a
4808 // load from a GEP. This reduces the size of the load. If a load is used
4809 // only by extractvalue instructions then this either must have been
4810 // optimized before, or it is a struct with padding, in which case we
4811 // don't want to do the transformation as it loses padding knowledge.
4812 if (L->isSimple() && L->hasOneUse()) {
4813 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4814 SmallVector<Value*, 4> Indices;
4815 // Prefix an i32 0 since we need the first element.
4816 Indices.push_back(Elt: Builder.getInt32(C: 0));
4817 for (unsigned Idx : EV.indices())
4818 Indices.push_back(Elt: Builder.getInt32(C: Idx));
4819
4820 // We need to insert these at the location of the old load, not at that of
4821 // the extractvalue.
4822 Builder.SetInsertPoint(L);
4823 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4824 Ptr: L->getPointerOperand(), IdxList: Indices);
4825 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4826 // Whatever aliasing information we had for the orignal load must also
4827 // hold for the smaller load, so propagate the annotations.
4828 NL->setAAMetadata(L->getAAMetadata());
4829 // Returning the load directly will cause the main loop to insert it in
4830 // the wrong spot, so use replaceInstUsesWith().
4831 return replaceInstUsesWith(I&: EV, V: NL);
4832 }
4833 }
4834
4835 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4836 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4837 return Res;
4838
4839 // Canonicalize extract (select Cond, TV, FV)
4840 // -> select cond, (extract TV), (extract FV)
4841 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4842 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
4843 return R;
4844
4845 // We could simplify extracts from other values. Note that nested extracts may
4846 // already be simplified implicitly by the above: extract (extract (insert) )
4847 // will be translated into extract ( insert ( extract ) ) first and then just
4848 // the value inserted, if appropriate. Similarly for extracts from single-use
4849 // loads: extract (extract (load)) will be translated to extract (load (gep))
4850 // and if again single-use then via load (gep (gep)) to load (gep).
4851 // However, double extracts from e.g. function arguments or return values
4852 // aren't handled yet.
4853 return nullptr;
4854}
4855
4856/// Return 'true' if the given typeinfo will match anything.
4857static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4858 switch (Personality) {
4859 case EHPersonality::GNU_C:
4860 case EHPersonality::GNU_C_SjLj:
4861 case EHPersonality::Rust:
4862 // The GCC C EH and Rust personality only exists to support cleanups, so
4863 // it's not clear what the semantics of catch clauses are.
4864 return false;
4865 case EHPersonality::Unknown:
4866 return false;
4867 case EHPersonality::GNU_Ada:
4868 // While __gnat_all_others_value will match any Ada exception, it doesn't
4869 // match foreign exceptions (or didn't, before gcc-4.7).
4870 return false;
4871 case EHPersonality::GNU_CXX:
4872 case EHPersonality::GNU_CXX_SjLj:
4873 case EHPersonality::GNU_ObjC:
4874 case EHPersonality::MSVC_X86SEH:
4875 case EHPersonality::MSVC_TableSEH:
4876 case EHPersonality::MSVC_CXX:
4877 case EHPersonality::CoreCLR:
4878 case EHPersonality::Wasm_CXX:
4879 case EHPersonality::XL_CXX:
4880 case EHPersonality::ZOS_CXX:
4881 return TypeInfo->isNullValue();
4882 }
4883 llvm_unreachable("invalid enum");
4884}
4885
4886static bool shorter_filter(const Value *LHS, const Value *RHS) {
4887 return
4888 cast<ArrayType>(Val: LHS->getType())->getNumElements()
4889 <
4890 cast<ArrayType>(Val: RHS->getType())->getNumElements();
4891}
4892
4893Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4894 // The logic here should be correct for any real-world personality function.
4895 // However if that turns out not to be true, the offending logic can always
4896 // be conditioned on the personality function, like the catch-all logic is.
4897 EHPersonality Personality =
4898 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4899
4900 // Simplify the list of clauses, eg by removing repeated catch clauses
4901 // (these are often created by inlining).
4902 bool MakeNewInstruction = false; // If true, recreate using the following:
4903 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4904 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4905
4906 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4907 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4908 bool isLastClause = i + 1 == e;
4909 if (LI.isCatch(Idx: i)) {
4910 // A catch clause.
4911 Constant *CatchClause = LI.getClause(Idx: i);
4912 Constant *TypeInfo = CatchClause->stripPointerCasts();
4913
4914 // If we already saw this clause, there is no point in having a second
4915 // copy of it.
4916 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4917 // This catch clause was not already seen.
4918 NewClauses.push_back(Elt: CatchClause);
4919 } else {
4920 // Repeated catch clause - drop the redundant copy.
4921 MakeNewInstruction = true;
4922 }
4923
4924 // If this is a catch-all then there is no point in keeping any following
4925 // clauses or marking the landingpad as having a cleanup.
4926 if (isCatchAll(Personality, TypeInfo)) {
4927 if (!isLastClause)
4928 MakeNewInstruction = true;
4929 CleanupFlag = false;
4930 break;
4931 }
4932 } else {
4933 // A filter clause. If any of the filter elements were already caught
4934 // then they can be dropped from the filter. It is tempting to try to
4935 // exploit the filter further by saying that any typeinfo that does not
4936 // occur in the filter can't be caught later (and thus can be dropped).
4937 // However this would be wrong, since typeinfos can match without being
4938 // equal (for example if one represents a C++ class, and the other some
4939 // class derived from it).
4940 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4941 Constant *FilterClause = LI.getClause(Idx: i);
4942 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4943 unsigned NumTypeInfos = FilterType->getNumElements();
4944
4945 // An empty filter catches everything, so there is no point in keeping any
4946 // following clauses or marking the landingpad as having a cleanup. By
4947 // dealing with this case here the following code is made a bit simpler.
4948 if (!NumTypeInfos) {
4949 NewClauses.push_back(Elt: FilterClause);
4950 if (!isLastClause)
4951 MakeNewInstruction = true;
4952 CleanupFlag = false;
4953 break;
4954 }
4955
4956 bool MakeNewFilter = false; // If true, make a new filter.
4957 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4958 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4959 // Not an empty filter - it contains at least one null typeinfo.
4960 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4961 Constant *TypeInfo =
4962 Constant::getNullValue(Ty: FilterType->getElementType());
4963 // If this typeinfo is a catch-all then the filter can never match.
4964 if (isCatchAll(Personality, TypeInfo)) {
4965 // Throw the filter away.
4966 MakeNewInstruction = true;
4967 continue;
4968 }
4969
4970 // There is no point in having multiple copies of this typeinfo, so
4971 // discard all but the first copy if there is more than one.
4972 NewFilterElts.push_back(Elt: TypeInfo);
4973 if (NumTypeInfos > 1)
4974 MakeNewFilter = true;
4975 } else {
4976 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4977 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4978 NewFilterElts.reserve(N: NumTypeInfos);
4979
4980 // Remove any filter elements that were already caught or that already
4981 // occurred in the filter. While there, see if any of the elements are
4982 // catch-alls. If so, the filter can be discarded.
4983 bool SawCatchAll = false;
4984 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4985 Constant *Elt = Filter->getOperand(i_nocapture: j);
4986 Constant *TypeInfo = Elt->stripPointerCasts();
4987 if (isCatchAll(Personality, TypeInfo)) {
4988 // This element is a catch-all. Bail out, noting this fact.
4989 SawCatchAll = true;
4990 break;
4991 }
4992
4993 // Even if we've seen a type in a catch clause, we don't want to
4994 // remove it from the filter. An unexpected type handler may be
4995 // set up for a call site which throws an exception of the same
4996 // type caught. In order for the exception thrown by the unexpected
4997 // handler to propagate correctly, the filter must be correctly
4998 // described for the call site.
4999 //
5000 // Example:
5001 //
5002 // void unexpected() { throw 1;}
5003 // void foo() throw (int) {
5004 // std::set_unexpected(unexpected);
5005 // try {
5006 // throw 2.0;
5007 // } catch (int i) {}
5008 // }
5009
5010 // There is no point in having multiple copies of the same typeinfo in
5011 // a filter, so only add it if we didn't already.
5012 if (SeenInFilter.insert(Ptr: TypeInfo).second)
5013 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
5014 }
5015 // A filter containing a catch-all cannot match anything by definition.
5016 if (SawCatchAll) {
5017 // Throw the filter away.
5018 MakeNewInstruction = true;
5019 continue;
5020 }
5021
5022 // If we dropped something from the filter, make a new one.
5023 if (NewFilterElts.size() < NumTypeInfos)
5024 MakeNewFilter = true;
5025 }
5026 if (MakeNewFilter) {
5027 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
5028 NumElements: NewFilterElts.size());
5029 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
5030 MakeNewInstruction = true;
5031 }
5032
5033 NewClauses.push_back(Elt: FilterClause);
5034
5035 // If the new filter is empty then it will catch everything so there is
5036 // no point in keeping any following clauses or marking the landingpad
5037 // as having a cleanup. The case of the original filter being empty was
5038 // already handled above.
5039 if (MakeNewFilter && !NewFilterElts.size()) {
5040 assert(MakeNewInstruction && "New filter but not a new instruction!");
5041 CleanupFlag = false;
5042 break;
5043 }
5044 }
5045 }
5046
5047 // If several filters occur in a row then reorder them so that the shortest
5048 // filters come first (those with the smallest number of elements). This is
5049 // advantageous because shorter filters are more likely to match, speeding up
5050 // unwinding, but mostly because it increases the effectiveness of the other
5051 // filter optimizations below.
5052 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5053 unsigned j;
5054 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5055 for (j = i; j != e; ++j)
5056 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
5057 break;
5058
5059 // Check whether the filters are already sorted by length. We need to know
5060 // if sorting them is actually going to do anything so that we only make a
5061 // new landingpad instruction if it does.
5062 for (unsigned k = i; k + 1 < j; ++k)
5063 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
5064 // Not sorted, so sort the filters now. Doing an unstable sort would be
5065 // correct too but reordering filters pointlessly might confuse users.
5066 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
5067 comp: shorter_filter);
5068 MakeNewInstruction = true;
5069 break;
5070 }
5071
5072 // Look for the next batch of filters.
5073 i = j + 1;
5074 }
5075
5076 // If typeinfos matched if and only if equal, then the elements of a filter L
5077 // that occurs later than a filter F could be replaced by the intersection of
5078 // the elements of F and L. In reality two typeinfos can match without being
5079 // equal (for example if one represents a C++ class, and the other some class
5080 // derived from it) so it would be wrong to perform this transform in general.
5081 // However the transform is correct and useful if F is a subset of L. In that
5082 // case L can be replaced by F, and thus removed altogether since repeating a
5083 // filter is pointless. So here we look at all pairs of filters F and L where
5084 // L follows F in the list of clauses, and remove L if every element of F is
5085 // an element of L. This can occur when inlining C++ functions with exception
5086 // specifications.
5087 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5088 // Examine each filter in turn.
5089 Value *Filter = NewClauses[i];
5090 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
5091 if (!FTy)
5092 // Not a filter - skip it.
5093 continue;
5094 unsigned FElts = FTy->getNumElements();
5095 // Examine each filter following this one. Doing this backwards means that
5096 // we don't have to worry about filters disappearing under us when removed.
5097 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5098 Value *LFilter = NewClauses[j];
5099 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
5100 if (!LTy)
5101 // Not a filter - skip it.
5102 continue;
5103 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5104 // an element of LFilter, then discard LFilter.
5105 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5106 // If Filter is empty then it is a subset of LFilter.
5107 if (!FElts) {
5108 // Discard LFilter.
5109 NewClauses.erase(CI: J);
5110 MakeNewInstruction = true;
5111 // Move on to the next filter.
5112 continue;
5113 }
5114 unsigned LElts = LTy->getNumElements();
5115 // If Filter is longer than LFilter then it cannot be a subset of it.
5116 if (FElts > LElts)
5117 // Move on to the next filter.
5118 continue;
5119 // At this point we know that LFilter has at least one element.
5120 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
5121 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5122 // already know that Filter is not longer than LFilter).
5123 if (isa<ConstantAggregateZero>(Val: Filter)) {
5124 assert(FElts <= LElts && "Should have handled this case earlier!");
5125 // Discard LFilter.
5126 NewClauses.erase(CI: J);
5127 MakeNewInstruction = true;
5128 }
5129 // Move on to the next filter.
5130 continue;
5131 }
5132 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
5133 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
5134 // Since Filter is non-empty and contains only zeros, it is a subset of
5135 // LFilter iff LFilter contains a zero.
5136 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5137 for (unsigned l = 0; l != LElts; ++l)
5138 if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
5139 // LFilter contains a zero - discard it.
5140 NewClauses.erase(CI: J);
5141 MakeNewInstruction = true;
5142 break;
5143 }
5144 // Move on to the next filter.
5145 continue;
5146 }
5147 // At this point we know that both filters are ConstantArrays. Loop over
5148 // operands to see whether every element of Filter is also an element of
5149 // LFilter. Since filters tend to be short this is probably faster than
5150 // using a method that scales nicely.
5151 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
5152 bool AllFound = true;
5153 for (unsigned f = 0; f != FElts; ++f) {
5154 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
5155 AllFound = false;
5156 for (unsigned l = 0; l != LElts; ++l) {
5157 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
5158 if (LTypeInfo == FTypeInfo) {
5159 AllFound = true;
5160 break;
5161 }
5162 }
5163 if (!AllFound)
5164 break;
5165 }
5166 if (AllFound) {
5167 // Discard LFilter.
5168 NewClauses.erase(CI: J);
5169 MakeNewInstruction = true;
5170 }
5171 // Move on to the next filter.
5172 }
5173 }
5174
5175 // If we changed any of the clauses, replace the old landingpad instruction
5176 // with a new one.
5177 if (MakeNewInstruction) {
5178 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
5179 NumReservedClauses: NewClauses.size());
5180 for (Constant *C : NewClauses)
5181 NLI->addClause(ClauseVal: C);
5182 // A landing pad with no clauses must have the cleanup flag set. It is
5183 // theoretically possible, though highly unlikely, that we eliminated all
5184 // clauses. If so, force the cleanup flag to true.
5185 if (NewClauses.empty())
5186 CleanupFlag = true;
5187 NLI->setCleanup(CleanupFlag);
5188 return NLI;
5189 }
5190
5191 // Even if none of the clauses changed, we may nonetheless have understood
5192 // that the cleanup flag is pointless. Clear it if so.
5193 if (LI.isCleanup() != CleanupFlag) {
5194 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5195 LI.setCleanup(CleanupFlag);
5196 return &LI;
5197 }
5198
5199 return nullptr;
5200}
5201
5202Value *
5203InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
5204 // Try to push freeze through instructions that propagate but don't produce
5205 // poison as far as possible. If an operand of freeze follows three
5206 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5207 // guaranteed-non-poison operands then push the freeze through to the one
5208 // operand that is not guaranteed non-poison. The actual transform is as
5209 // follows.
5210 // Op1 = ... ; Op1 can be posion
5211 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5212 // ; single guaranteed-non-poison operands
5213 // ... = Freeze(Op0)
5214 // =>
5215 // Op1 = ...
5216 // Op1.fr = Freeze(Op1)
5217 // ... = Inst(Op1.fr, NonPoisonOps...)
5218 auto *OrigOp = OrigFI.getOperand(i_nocapture: 0);
5219 auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
5220
5221 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5222 // potentially reduces their optimization potential, so let's only do this iff
5223 // the OrigOp is only used by the freeze.
5224 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp))
5225 return nullptr;
5226
5227 // We can't push the freeze through an instruction which can itself create
5228 // poison. If the only source of new poison is flags, we can simply
5229 // strip them (since we know the only use is the freeze and nothing can
5230 // benefit from them.)
5231 if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
5232 /*ConsiderFlagsAndMetadata*/ false))
5233 return nullptr;
5234
5235 // If operand is guaranteed not to be poison, there is no need to add freeze
5236 // to the operand. So we first find the operand that is not guaranteed to be
5237 // poison.
5238 Value *MaybePoisonOperand = nullptr;
5239 for (Value *V : OrigOpInst->operands()) {
5240 if (isa<MetadataAsValue>(Val: V) || isGuaranteedNotToBeUndefOrPoison(V) ||
5241 // Treat identical operands as a single operand.
5242 (MaybePoisonOperand && MaybePoisonOperand == V))
5243 continue;
5244 if (!MaybePoisonOperand)
5245 MaybePoisonOperand = V;
5246 else
5247 return nullptr;
5248 }
5249
5250 OrigOpInst->dropPoisonGeneratingAnnotations();
5251
5252 // If all operands are guaranteed to be non-poison, we can drop freeze.
5253 if (!MaybePoisonOperand)
5254 return OrigOp;
5255
5256 Builder.SetInsertPoint(OrigOpInst);
5257 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5258 V: MaybePoisonOperand, Name: MaybePoisonOperand->getName() + ".fr");
5259
5260 OrigOpInst->replaceUsesOfWith(From: MaybePoisonOperand, To: FrozenMaybePoisonOperand);
5261 return OrigOp;
5262}
5263
5264Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
5265 PHINode *PN) {
5266 // Detect whether this is a recurrence with a start value and some number of
5267 // backedge values. We'll check whether we can push the freeze through the
5268 // backedge values (possibly dropping poison flags along the way) until we
5269 // reach the phi again. In that case, we can move the freeze to the start
5270 // value.
5271 Use *StartU = nullptr;
5272 SmallVector<Value *> Worklist;
5273 for (Use &U : PN->incoming_values()) {
5274 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
5275 // Add backedge value to worklist.
5276 Worklist.push_back(Elt: U.get());
5277 continue;
5278 }
5279
5280 // Don't bother handling multiple start values.
5281 if (StartU)
5282 return nullptr;
5283 StartU = &U;
5284 }
5285
5286 if (!StartU || Worklist.empty())
5287 return nullptr; // Not a recurrence.
5288
5289 Value *StartV = StartU->get();
5290 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
5291 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
5292 // We can't insert freeze if the start value is the result of the
5293 // terminator (e.g. an invoke).
5294 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5295 return nullptr;
5296
5297 SmallPtrSet<Value *, 32> Visited;
5298 SmallVector<Instruction *> DropFlags;
5299 while (!Worklist.empty()) {
5300 Value *V = Worklist.pop_back_val();
5301 if (!Visited.insert(Ptr: V).second)
5302 continue;
5303
5304 if (Visited.size() > 32)
5305 return nullptr; // Limit the total number of values we inspect.
5306
5307 // Assume that PN is non-poison, because it will be after the transform.
5308 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5309 continue;
5310
5311 Instruction *I = dyn_cast<Instruction>(Val: V);
5312 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
5313 /*ConsiderFlagsAndMetadata*/ false))
5314 return nullptr;
5315
5316 DropFlags.push_back(Elt: I);
5317 append_range(C&: Worklist, R: I->operands());
5318 }
5319
5320 for (Instruction *I : DropFlags)
5321 I->dropPoisonGeneratingAnnotations();
5322
5323 if (StartNeedsFreeze) {
5324 Builder.SetInsertPoint(StartBB->getTerminator());
5325 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
5326 Name: StartV->getName() + ".fr");
5327 replaceUse(U&: *StartU, NewValue: FrozenStartV);
5328 }
5329 return replaceInstUsesWith(I&: FI, V: PN);
5330}
5331
5332bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
5333 Value *Op = FI.getOperand(i_nocapture: 0);
5334
5335 if (isa<Constant>(Val: Op) || Op->hasOneUse())
5336 return false;
5337
5338 // Move the freeze directly after the definition of its operand, so that
5339 // it dominates the maximum number of uses. Note that it may not dominate
5340 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5341 // the normal/default destination. This is why the domination check in the
5342 // replacement below is still necessary.
5343 BasicBlock::iterator MoveBefore;
5344 if (isa<Argument>(Val: Op)) {
5345 MoveBefore =
5346 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5347 } else {
5348 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5349 if (!MoveBeforeOpt)
5350 return false;
5351 MoveBefore = *MoveBeforeOpt;
5352 }
5353
5354 // Re-point iterator to come after any debug-info records.
5355 MoveBefore.setHeadBit(false);
5356
5357 bool Changed = false;
5358 if (&FI != &*MoveBefore) {
5359 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
5360 Changed = true;
5361 }
5362
5363 Changed |= Op->replaceUsesWithIf(
5364 New: &FI, ShouldReplace: [&](Use &U) -> bool { return DT.dominates(Def: &FI, U); });
5365
5366 return Changed;
5367}
5368
5369// Check if any direct or bitcast user of this value is a shuffle instruction.
5370static bool isUsedWithinShuffleVector(Value *V) {
5371 for (auto *U : V->users()) {
5372 if (isa<ShuffleVectorInst>(Val: U))
5373 return true;
5374 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5375 return true;
5376 }
5377 return false;
5378}
5379
5380Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5381 Value *Op0 = I.getOperand(i_nocapture: 0);
5382
5383 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5384 return replaceInstUsesWith(I, V);
5385
5386 // freeze (phi const, x) --> phi const, (freeze x)
5387 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5388 if (Instruction *NV = foldOpIntoPhi(I, PN))
5389 return NV;
5390 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5391 return NV;
5392 }
5393
5394 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5395 return replaceInstUsesWith(I, V: NI);
5396
5397 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5398 // - or: pick -1
5399 // - select's condition: if the true value is constant, choose it by making
5400 // the condition true.
5401 // - phi: pick the common constant across operands
5402 // - default: pick 0
5403 //
5404 // Note that this transform is intentionally done here rather than
5405 // via an analysis in InstSimplify or at individual user sites. That is
5406 // because we must produce the same value for all uses of the freeze -
5407 // it's the reason "freeze" exists!
5408 //
5409 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5410 // duplicating logic for binops at least.
5411 auto getUndefReplacement = [&](Type *Ty) {
5412 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5413 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5414 // removed.
5415 Constant *BestValue = nullptr;
5416 for (Value *V : PN.incoming_values()) {
5417 if (match(V, P: m_Freeze(Op: m_Undef())))
5418 continue;
5419
5420 Constant *C = dyn_cast<Constant>(Val: V);
5421 if (!C)
5422 return nullptr;
5423
5424 if (!isGuaranteedNotToBeUndefOrPoison(V: C))
5425 return nullptr;
5426
5427 if (BestValue && BestValue != C)
5428 return nullptr;
5429
5430 BestValue = C;
5431 }
5432 return BestValue;
5433 };
5434
5435 Value *NullValue = Constant::getNullValue(Ty);
5436 Value *BestValue = nullptr;
5437 for (auto *U : I.users()) {
5438 Value *V = NullValue;
5439 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5440 V = ConstantInt::getAllOnesValue(Ty);
5441 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5442 V = ConstantInt::getTrue(Ty);
5443 else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5444 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5445 V = NullValue;
5446 } else if (auto *PHI = dyn_cast<PHINode>(Val: U)) {
5447 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5448 V = MaybeV;
5449 }
5450
5451 if (!BestValue)
5452 BestValue = V;
5453 else if (BestValue != V)
5454 BestValue = NullValue;
5455 }
5456 assert(BestValue && "Must have at least one use");
5457 assert(BestValue != &I && "Cannot replace with itself");
5458 return BestValue;
5459 };
5460
5461 if (match(V: Op0, P: m_Undef())) {
5462 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5463 // a shuffle. This may improve codegen for shuffles that allow
5464 // unspecified inputs.
5465 if (isUsedWithinShuffleVector(V: &I))
5466 return nullptr;
5467 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
5468 }
5469
5470 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5471 Type *Ty = C->getType();
5472 auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5473 if (!VTy)
5474 return nullptr;
5475 unsigned NumElts = VTy->getNumElements();
5476 Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5477 for (unsigned i = 0; i != NumElts; ++i) {
5478 Constant *EltC = C->getAggregateElement(Elt: i);
5479 if (EltC && !match(V: EltC, P: m_Undef())) {
5480 BestValue = EltC;
5481 break;
5482 }
5483 }
5484 return Constant::replaceUndefsWith(C, Replacement: BestValue);
5485 };
5486
5487 Constant *C;
5488 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5489 !C->containsConstantExpression()) {
5490 if (Constant *Repl = getFreezeVectorReplacement(C))
5491 return replaceInstUsesWith(I, V: Repl);
5492 }
5493
5494 // Replace uses of Op with freeze(Op).
5495 if (freezeOtherUses(FI&: I))
5496 return &I;
5497
5498 return nullptr;
5499}
5500
5501/// Check for case where the call writes to an otherwise dead alloca. This
5502/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5503/// helper *only* analyzes the write; doesn't check any other legality aspect.
5504static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5505 auto *CB = dyn_cast<CallBase>(Val: I);
5506 if (!CB)
5507 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5508 // to allow reload along used path as described below. Otherwise, this
5509 // is simply a store to a dead allocation which will be removed.
5510 return false;
5511 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5512 if (!Dest)
5513 return false;
5514 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
5515 if (!AI)
5516 // TODO: allow malloc?
5517 return false;
5518 // TODO: allow memory access dominated by move point? Note that since AI
5519 // could have a reference to itself captured by the call, we would need to
5520 // account for cycles in doing so.
5521 SmallVector<const User *> AllocaUsers;
5522 SmallPtrSet<const User *, 4> Visited;
5523 auto pushUsers = [&](const Instruction &I) {
5524 for (const User *U : I.users()) {
5525 if (Visited.insert(Ptr: U).second)
5526 AllocaUsers.push_back(Elt: U);
5527 }
5528 };
5529 pushUsers(*AI);
5530 while (!AllocaUsers.empty()) {
5531 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5532 if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) {
5533 pushUsers(*UserI);
5534 continue;
5535 }
5536 if (UserI == CB)
5537 continue;
5538 // TODO: support lifetime.start/end here
5539 return false;
5540 }
5541 return true;
5542}
5543
5544/// Try to move the specified instruction from its current block into the
5545/// beginning of DestBlock, which can only happen if it's safe to move the
5546/// instruction past all of the instructions between it and the end of its
5547/// block.
5548bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5549 BasicBlock *DestBlock) {
5550 BasicBlock *SrcBlock = I->getParent();
5551
5552 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5553 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5554 I->isTerminator())
5555 return false;
5556
5557 // Do not sink static or dynamic alloca instructions. Static allocas must
5558 // remain in the entry block, and dynamic allocas must not be sunk in between
5559 // a stacksave / stackrestore pair, which would incorrectly shorten its
5560 // lifetime.
5561 if (isa<AllocaInst>(Val: I))
5562 return false;
5563
5564 // Do not sink into catchswitch blocks.
5565 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5566 return false;
5567
5568 // Do not sink convergent call instructions.
5569 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5570 if (CI->isConvergent())
5571 return false;
5572 }
5573
5574 // Unless we can prove that the memory write isn't visibile except on the
5575 // path we're sinking to, we must bail.
5576 if (I->mayWriteToMemory()) {
5577 if (!SoleWriteToDeadLocal(I, TLI))
5578 return false;
5579 }
5580
5581 // We can only sink load instructions if there is nothing between the load and
5582 // the end of block that could change the value.
5583 if (I->mayReadFromMemory() &&
5584 !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5585 // We don't want to do any sophisticated alias analysis, so we only check
5586 // the instructions after I in I's parent block if we try to sink to its
5587 // successor block.
5588 if (DestBlock->getUniquePredecessor() != I->getParent())
5589 return false;
5590 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5591 E = I->getParent()->end();
5592 Scan != E; ++Scan)
5593 if (Scan->mayWriteToMemory())
5594 return false;
5595 }
5596
5597 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5598 auto *I = dyn_cast<Instruction>(Val: U->getUser());
5599 if (I && I->getParent() != DestBlock) {
5600 Worklist.add(I);
5601 return true;
5602 }
5603 return false;
5604 });
5605 /// FIXME: We could remove droppable uses that are not dominated by
5606 /// the new position.
5607
5608 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5609 I->moveBefore(BB&: *DestBlock, I: InsertPos);
5610 ++NumSunkInst;
5611
5612 // Also sink all related debug uses from the source basic block. Otherwise we
5613 // get debug use before the def. Attempt to salvage debug uses first, to
5614 // maximise the range variables have location for. If we cannot salvage, then
5615 // mark the location undef: we know it was supposed to receive a new location
5616 // here, but that computation has been sunk.
5617 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5618 findDbgUsers(V: I, DbgVariableRecords);
5619 if (!DbgVariableRecords.empty())
5620 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5621 DPUsers&: DbgVariableRecords);
5622
5623 // PS: there are numerous flaws with this behaviour, not least that right now
5624 // assignments can be re-ordered past other assignments to the same variable
5625 // if they use different Values. Creating more undef assignements can never be
5626 // undone. And salvaging all users outside of this block can un-necessarily
5627 // alter the lifetime of the live-value that the variable refers to.
5628 // Some of these things can be resolved by tolerating debug use-before-defs in
5629 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5630 // being used for more architectures.
5631
5632 return true;
5633}
5634
5635void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5636 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5637 BasicBlock *DestBlock,
5638 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5639 // For all debug values in the destination block, the sunk instruction
5640 // will still be available, so they do not need to be dropped.
5641
5642 // Fetch all DbgVariableRecords not already in the destination.
5643 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5644 for (auto &DVR : DbgVariableRecords)
5645 if (DVR->getParent() != DestBlock)
5646 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5647
5648 // Fetch a second collection, of DbgVariableRecords in the source block that
5649 // we're going to sink.
5650 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5651 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5652 if (DVR->getParent() == SrcBlock)
5653 DbgVariableRecordsToSink.push_back(Elt: DVR);
5654
5655 // Sort DbgVariableRecords according to their position in the block. This is a
5656 // partial order: DbgVariableRecords attached to different instructions will
5657 // be ordered by the instruction order, but DbgVariableRecords attached to the
5658 // same instruction won't have an order.
5659 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5660 return B->getInstruction()->comesBefore(Other: A->getInstruction());
5661 };
5662 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5663
5664 // If there are two assignments to the same variable attached to the same
5665 // instruction, the ordering between the two assignments is important. Scan
5666 // for this (rare) case and establish which is the last assignment.
5667 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5668 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5669 if (DbgVariableRecordsToSink.size() > 1) {
5670 SmallDenseMap<InstVarPair, unsigned> CountMap;
5671 // Count how many assignments to each variable there is per instruction.
5672 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5673 DebugVariable DbgUserVariable =
5674 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5675 DVR->getDebugLoc()->getInlinedAt());
5676 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
5677 }
5678
5679 // If there are any instructions with two assignments, add them to the
5680 // FilterOutMap to record that they need extra filtering.
5681 SmallPtrSet<const Instruction *, 4> DupSet;
5682 for (auto It : CountMap) {
5683 if (It.second > 1) {
5684 FilterOutMap[It.first] = nullptr;
5685 DupSet.insert(Ptr: It.first.first);
5686 }
5687 }
5688
5689 // For all instruction/variable pairs needing extra filtering, find the
5690 // latest assignment.
5691 for (const Instruction *Inst : DupSet) {
5692 for (DbgVariableRecord &DVR :
5693 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5694 DebugVariable DbgUserVariable =
5695 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5696 DVR.getDebugLoc()->getInlinedAt());
5697 auto FilterIt =
5698 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5699 if (FilterIt == FilterOutMap.end())
5700 continue;
5701 if (FilterIt->second != nullptr)
5702 continue;
5703 FilterIt->second = &DVR;
5704 }
5705 }
5706 }
5707
5708 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5709 // out any duplicate assignments identified above.
5710 SmallVector<DbgVariableRecord *, 2> DVRClones;
5711 SmallSet<DebugVariable, 4> SunkVariables;
5712 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5713 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5714 continue;
5715
5716 DebugVariable DbgUserVariable =
5717 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5718 DVR->getDebugLoc()->getInlinedAt());
5719
5720 // For any variable where there were multiple assignments in the same place,
5721 // ignore all but the last assignment.
5722 if (!FilterOutMap.empty()) {
5723 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5724 auto It = FilterOutMap.find(Val: IVP);
5725
5726 // Filter out.
5727 if (It != FilterOutMap.end() && It->second != DVR)
5728 continue;
5729 }
5730
5731 if (!SunkVariables.insert(V: DbgUserVariable).second)
5732 continue;
5733
5734 if (DVR->isDbgAssign())
5735 continue;
5736
5737 DVRClones.emplace_back(Args: DVR->clone());
5738 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5739 }
5740
5741 // Perform salvaging without the clones, then sink the clones.
5742 if (DVRClones.empty())
5743 return;
5744
5745 salvageDebugInfoForDbgValues(I&: *I, DPInsns: DbgVariableRecordsToSalvage);
5746
5747 // The clones are in reverse order of original appearance. Assert that the
5748 // head bit is set on the iterator as we _should_ have received it via
5749 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5750 // we'll repeatedly insert at the head, such as:
5751 // DVR-3 (third insertion goes here)
5752 // DVR-2 (second insertion goes here)
5753 // DVR-1 (first insertion goes here)
5754 // Any-Prior-DVRs
5755 // InsertPtInst
5756 assert(InsertPos.getHeadBit());
5757 for (DbgVariableRecord *DVRClone : DVRClones) {
5758 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5759 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5760 }
5761}
5762
5763bool InstCombinerImpl::run() {
5764 while (!Worklist.isEmpty()) {
5765 // Walk deferred instructions in reverse order, and push them to the
5766 // worklist, which means they'll end up popped from the worklist in-order.
5767 while (Instruction *I = Worklist.popDeferred()) {
5768 // Check to see if we can DCE the instruction. We do this already here to
5769 // reduce the number of uses and thus allow other folds to trigger.
5770 // Note that eraseInstFromFunction() may push additional instructions on
5771 // the deferred worklist, so this will DCE whole instruction chains.
5772 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5773 eraseInstFromFunction(I&: *I);
5774 ++NumDeadInst;
5775 continue;
5776 }
5777
5778 Worklist.push(I);
5779 }
5780
5781 Instruction *I = Worklist.removeOne();
5782 if (I == nullptr) continue; // skip null values.
5783
5784 // Check to see if we can DCE the instruction.
5785 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5786 eraseInstFromFunction(I&: *I);
5787 ++NumDeadInst;
5788 continue;
5789 }
5790
5791 if (!DebugCounter::shouldExecute(Counter&: VisitCounter))
5792 continue;
5793
5794 // See if we can trivially sink this instruction to its user if we can
5795 // prove that the successor is not executed more frequently than our block.
5796 // Return the UserBlock if successful.
5797 auto getOptionalSinkBlockForInst =
5798 [this](Instruction *I) -> std::optional<BasicBlock *> {
5799 if (!EnableCodeSinking)
5800 return std::nullopt;
5801
5802 BasicBlock *BB = I->getParent();
5803 BasicBlock *UserParent = nullptr;
5804 unsigned NumUsers = 0;
5805
5806 for (Use &U : I->uses()) {
5807 User *User = U.getUser();
5808 if (User->isDroppable()) {
5809 // Do not sink if there are dereferenceable assumes that would be
5810 // removed.
5811 auto II = dyn_cast<IntrinsicInst>(Val: User);
5812 if (II->getIntrinsicID() != Intrinsic::assume ||
5813 !II->getOperandBundle(Name: "dereferenceable"))
5814 continue;
5815 }
5816
5817 if (NumUsers > MaxSinkNumUsers)
5818 return std::nullopt;
5819
5820 Instruction *UserInst = cast<Instruction>(Val: User);
5821 // Special handling for Phi nodes - get the block the use occurs in.
5822 BasicBlock *UserBB = UserInst->getParent();
5823 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5824 UserBB = PN->getIncomingBlock(U);
5825 // Bail out if we have uses in different blocks. We don't do any
5826 // sophisticated analysis (i.e finding NearestCommonDominator of these
5827 // use blocks).
5828 if (UserParent && UserParent != UserBB)
5829 return std::nullopt;
5830 UserParent = UserBB;
5831
5832 // Make sure these checks are done only once, naturally we do the checks
5833 // the first time we get the userparent, this will save compile time.
5834 if (NumUsers == 0) {
5835 // Try sinking to another block. If that block is unreachable, then do
5836 // not bother. SimplifyCFG should handle it.
5837 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
5838 return std::nullopt;
5839
5840 auto *Term = UserParent->getTerminator();
5841 // See if the user is one of our successors that has only one
5842 // predecessor, so that we don't have to split the critical edge.
5843 // Another option where we can sink is a block that ends with a
5844 // terminator that does not pass control to other block (such as
5845 // return or unreachable or resume). In this case:
5846 // - I dominates the User (by SSA form);
5847 // - the User will be executed at most once.
5848 // So sinking I down to User is always profitable or neutral.
5849 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5850 return std::nullopt;
5851
5852 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5853 }
5854
5855 NumUsers++;
5856 }
5857
5858 // No user or only has droppable users.
5859 if (!UserParent)
5860 return std::nullopt;
5861
5862 return UserParent;
5863 };
5864
5865 auto OptBB = getOptionalSinkBlockForInst(I);
5866 if (OptBB) {
5867 auto *UserParent = *OptBB;
5868 // Okay, the CFG is simple enough, try to sink this instruction.
5869 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5870 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5871 MadeIRChange = true;
5872 // We'll add uses of the sunk instruction below, but since
5873 // sinking can expose opportunities for it's *operands* add
5874 // them to the worklist
5875 for (Use &U : I->operands())
5876 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5877 Worklist.push(I: OpI);
5878 }
5879 }
5880
5881 // Now that we have an instruction, try combining it to simplify it.
5882 Builder.SetInsertPoint(I);
5883 Builder.CollectMetadataToCopy(
5884 Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5885
5886#ifndef NDEBUG
5887 std::string OrigI;
5888#endif
5889 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5890 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5891
5892 if (Instruction *Result = visit(I&: *I)) {
5893 ++NumCombined;
5894 // Should we replace the old instruction with a new one?
5895 if (Result != I) {
5896 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5897 << " New = " << *Result << '\n');
5898
5899 // We copy the old instruction's DebugLoc to the new instruction, unless
5900 // InstCombine already assigned a DebugLoc to it, in which case we
5901 // should trust the more specifically selected DebugLoc.
5902 Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5903 // We also copy annotation metadata to the new instruction.
5904 Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5905 // Everything uses the new instruction now.
5906 I->replaceAllUsesWith(V: Result);
5907
5908 // Move the name to the new instruction first.
5909 Result->takeName(V: I);
5910
5911 // Insert the new instruction into the basic block...
5912 BasicBlock *InstParent = I->getParent();
5913 BasicBlock::iterator InsertPos = I->getIterator();
5914
5915 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5916 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5917 // We need to fix up the insertion point.
5918 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5919 InsertPos = InstParent->getFirstInsertionPt();
5920 else // Non-PHI -> PHI
5921 InsertPos = InstParent->getFirstNonPHIIt();
5922 }
5923
5924 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5925
5926 // Push the new instruction and any users onto the worklist.
5927 Worklist.pushUsersToWorkList(I&: *Result);
5928 Worklist.push(I: Result);
5929
5930 eraseInstFromFunction(I&: *I);
5931 } else {
5932 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5933 << " New = " << *I << '\n');
5934
5935 // If the instruction was modified, it's possible that it is now dead.
5936 // if so, remove it.
5937 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5938 eraseInstFromFunction(I&: *I);
5939 } else {
5940 Worklist.pushUsersToWorkList(I&: *I);
5941 Worklist.push(I);
5942 }
5943 }
5944 MadeIRChange = true;
5945 }
5946 }
5947
5948 Worklist.zap();
5949 return MadeIRChange;
5950}
5951
5952// Track the scopes used by !alias.scope and !noalias. In a function, a
5953// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5954// by both sets. If not, the declaration of the scope can be safely omitted.
5955// The MDNode of the scope can be omitted as well for the instructions that are
5956// part of this function. We do not do that at this point, as this might become
5957// too time consuming to do.
5958class AliasScopeTracker {
5959 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5960 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5961
5962public:
5963 void analyse(Instruction *I) {
5964 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5965 if (!I->hasMetadataOtherThanDebugLoc())
5966 return;
5967
5968 auto Track = [](Metadata *ScopeList, auto &Container) {
5969 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5970 if (!MDScopeList || !Container.insert(MDScopeList).second)
5971 return;
5972 for (const auto &MDOperand : MDScopeList->operands())
5973 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5974 Container.insert(MDScope);
5975 };
5976
5977 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5978 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5979 }
5980
5981 bool isNoAliasScopeDeclDead(Instruction *Inst) {
5982 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5983 if (!Decl)
5984 return false;
5985
5986 assert(Decl->use_empty() &&
5987 "llvm.experimental.noalias.scope.decl in use ?");
5988 const MDNode *MDSL = Decl->getScopeList();
5989 assert(MDSL->getNumOperands() == 1 &&
5990 "llvm.experimental.noalias.scope should refer to a single scope");
5991 auto &MDOperand = MDSL->getOperand(I: 0);
5992 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5993 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
5994 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
5995
5996 // Not an MDNode ? throw away.
5997 return true;
5998 }
5999};
6000
6001/// Populate the IC worklist from a function, by walking it in reverse
6002/// post-order and adding all reachable code to the worklist.
6003///
6004/// This has a couple of tricks to make the code faster and more powerful. In
6005/// particular, we constant fold and DCE instructions as we go, to avoid adding
6006/// them to the worklist (this significantly speeds up instcombine on code where
6007/// many instructions are dead or constant). Additionally, if we find a branch
6008/// whose condition is a known constant, we only visit the reachable successors.
6009bool InstCombinerImpl::prepareWorklist(Function &F) {
6010 bool MadeIRChange = false;
6011 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
6012 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6013 DenseMap<Constant *, Constant *> FoldedConstants;
6014 AliasScopeTracker SeenAliasScopes;
6015
6016 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6017 for (BasicBlock *Succ : successors(BB))
6018 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
6019 for (PHINode &PN : Succ->phis())
6020 for (Use &U : PN.incoming_values())
6021 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
6022 U.set(PoisonValue::get(T: PN.getType()));
6023 MadeIRChange = true;
6024 }
6025 };
6026
6027 for (BasicBlock *BB : RPOT) {
6028 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
6029 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
6030 })) {
6031 HandleOnlyLiveSuccessor(BB, nullptr);
6032 continue;
6033 }
6034 LiveBlocks.insert(Ptr: BB);
6035
6036 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
6037 // ConstantProp instruction if trivially constant.
6038 if (!Inst.use_empty() &&
6039 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
6040 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
6041 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6042 << '\n');
6043 Inst.replaceAllUsesWith(V: C);
6044 ++NumConstProp;
6045 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
6046 Inst.eraseFromParent();
6047 MadeIRChange = true;
6048 continue;
6049 }
6050
6051 // See if we can constant fold its operands.
6052 for (Use &U : Inst.operands()) {
6053 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
6054 continue;
6055
6056 auto *C = cast<Constant>(Val&: U);
6057 Constant *&FoldRes = FoldedConstants[C];
6058 if (!FoldRes)
6059 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
6060
6061 if (FoldRes != C) {
6062 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6063 << "\n Old = " << *C
6064 << "\n New = " << *FoldRes << '\n');
6065 U = FoldRes;
6066 MadeIRChange = true;
6067 }
6068 }
6069
6070 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6071 // these call instructions consumes non-trivial amount of time and
6072 // provides no value for the optimization.
6073 if (!Inst.isDebugOrPseudoInst()) {
6074 InstrsForInstructionWorklist.push_back(Elt: &Inst);
6075 SeenAliasScopes.analyse(I: &Inst);
6076 }
6077 }
6078
6079 // If this is a branch or switch on a constant, mark only the single
6080 // live successor. Otherwise assume all successors are live.
6081 Instruction *TI = BB->getTerminator();
6082 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
6083 if (isa<UndefValue>(Val: BI->getCondition())) {
6084 // Branch on undef is UB.
6085 HandleOnlyLiveSuccessor(BB, nullptr);
6086 continue;
6087 }
6088 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
6089 bool CondVal = Cond->getZExtValue();
6090 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
6091 continue;
6092 }
6093 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
6094 if (isa<UndefValue>(Val: SI->getCondition())) {
6095 // Switch on undef is UB.
6096 HandleOnlyLiveSuccessor(BB, nullptr);
6097 continue;
6098 }
6099 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
6100 HandleOnlyLiveSuccessor(BB,
6101 SI->findCaseValue(C: Cond)->getCaseSuccessor());
6102 continue;
6103 }
6104 }
6105 }
6106
6107 // Remove instructions inside unreachable blocks. This prevents the
6108 // instcombine code from having to deal with some bad special cases, and
6109 // reduces use counts of instructions.
6110 for (BasicBlock &BB : F) {
6111 if (LiveBlocks.count(Ptr: &BB))
6112 continue;
6113
6114 unsigned NumDeadInstInBB;
6115 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
6116
6117 MadeIRChange |= NumDeadInstInBB != 0;
6118 NumDeadInst += NumDeadInstInBB;
6119 }
6120
6121 // Once we've found all of the instructions to add to instcombine's worklist,
6122 // add them in reverse order. This way instcombine will visit from the top
6123 // of the function down. This jives well with the way that it adds all uses
6124 // of instructions to the worklist after doing a transformation, thus avoiding
6125 // some N^2 behavior in pathological cases.
6126 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
6127 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
6128 // DCE instruction if trivially dead. As we iterate in reverse program
6129 // order here, we will clean up whole chains of dead instructions.
6130 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
6131 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6132 ++NumDeadInst;
6133 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6134 salvageDebugInfo(I&: *Inst);
6135 Inst->eraseFromParent();
6136 MadeIRChange = true;
6137 continue;
6138 }
6139
6140 Worklist.push(I: Inst);
6141 }
6142
6143 return MadeIRChange;
6144}
6145
6146void InstCombiner::computeBackEdges() {
6147 // Collect backedges.
6148 SmallVector<bool> Visited(F.getMaxBlockNumber());
6149 for (BasicBlock *BB : RPOT) {
6150 Visited[BB->getNumber()] = true;
6151 for (BasicBlock *Succ : successors(BB))
6152 if (Visited[Succ->getNumber()])
6153 BackEdges.insert(V: {BB, Succ});
6154 }
6155 ComputedBackEdges = true;
6156}
6157
6158static bool combineInstructionsOverFunction(
6159 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
6160 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
6161 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
6162 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
6163 const InstCombineOptions &Opts) {
6164 auto &DL = F.getDataLayout();
6165 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6166 !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
6167
6168 /// Builder - This is an IRBuilder that automatically inserts new
6169 /// instructions into the worklist when they are created.
6170 IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
6171 F.getContext(), TargetFolder(DL),
6172 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6173 Worklist.add(I);
6174 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
6175 AC.registerAssumption(CI: Assume);
6176 }));
6177
6178 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
6179
6180 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6181 // by instcombiner.
6182 bool MadeIRChange = false;
6183 if (ShouldLowerDbgDeclare)
6184 MadeIRChange = LowerDbgDeclare(F);
6185
6186 // Iterate while there is work to do.
6187 unsigned Iteration = 0;
6188 while (true) {
6189 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6190 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6191 << " on " << F.getName()
6192 << " reached; stopping without verifying fixpoint\n");
6193 break;
6194 }
6195
6196 ++Iteration;
6197 ++NumWorklistIterations;
6198 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6199 << F.getName() << "\n");
6200
6201 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6202 BPI, PSI, DL, RPOT);
6203 IC.MaxArraySizeForCombine = MaxArraySize;
6204 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6205 MadeChangeInThisIteration |= IC.run();
6206 if (!MadeChangeInThisIteration)
6207 break;
6208
6209 MadeIRChange = true;
6210 if (Iteration > Opts.MaxIterations) {
6211 reportFatalUsageError(
6212 reason: "Instruction Combining on " + Twine(F.getName()) +
6213 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6214 " iterations. " +
6215 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6216 "'instcombine-no-verify-fixpoint' to suppress this error.");
6217 }
6218 }
6219
6220 if (Iteration == 1)
6221 ++NumOneIteration;
6222 else if (Iteration == 2)
6223 ++NumTwoIterations;
6224 else if (Iteration == 3)
6225 ++NumThreeIterations;
6226 else
6227 ++NumFourOrMoreIterations;
6228
6229 return MadeIRChange;
6230}
6231
6232InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
6233
6234void InstCombinePass::printPipeline(
6235 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6236 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6237 OS, MapClassName2PassName);
6238 OS << '<';
6239 OS << "max-iterations=" << Options.MaxIterations << ";";
6240 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6241 OS << '>';
6242}
6243
6244char InstCombinePass::ID = 0;
6245
6246PreservedAnalyses InstCombinePass::run(Function &F,
6247 FunctionAnalysisManager &AM) {
6248 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
6249 // No changes since last InstCombine pass, exit early.
6250 if (LRT.shouldSkip(ID: &ID))
6251 return PreservedAnalyses::all();
6252
6253 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
6254 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
6255 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
6256 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
6257 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
6258
6259 auto *AA = &AM.getResult<AAManager>(IR&: F);
6260 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
6261 ProfileSummaryInfo *PSI =
6262 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
6263 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6264 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
6265 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
6266
6267 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6268 BFI, BPI, PSI, Opts: Options)) {
6269 // No changes, all analyses are preserved.
6270 LRT.update(ID: &ID, /*Changed=*/false);
6271 return PreservedAnalyses::all();
6272 }
6273
6274 // Mark all the analyses that instcombine updates as preserved.
6275 PreservedAnalyses PA;
6276 LRT.update(ID: &ID, /*Changed=*/true);
6277 PA.preserve<LastRunTrackingAnalysis>();
6278 PA.preserveSet<CFGAnalyses>();
6279 return PA;
6280}
6281
6282void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
6283 AU.setPreservesCFG();
6284 AU.addRequired<AAResultsWrapperPass>();
6285 AU.addRequired<AssumptionCacheTracker>();
6286 AU.addRequired<TargetLibraryInfoWrapperPass>();
6287 AU.addRequired<TargetTransformInfoWrapperPass>();
6288 AU.addRequired<DominatorTreeWrapperPass>();
6289 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
6290 AU.addPreserved<DominatorTreeWrapperPass>();
6291 AU.addPreserved<AAResultsWrapperPass>();
6292 AU.addPreserved<BasicAAWrapperPass>();
6293 AU.addPreserved<GlobalsAAWrapperPass>();
6294 AU.addRequired<ProfileSummaryInfoWrapperPass>();
6295 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
6296}
6297
6298bool InstructionCombiningPass::runOnFunction(Function &F) {
6299 if (skipFunction(F))
6300 return false;
6301
6302 // Required analyses.
6303 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6304 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6305 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6306 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6307 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6308 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6309
6310 // Optional analyses.
6311 ProfileSummaryInfo *PSI =
6312 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6313 BlockFrequencyInfo *BFI =
6314 (PSI && PSI->hasProfileSummary()) ?
6315 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6316 nullptr;
6317 BranchProbabilityInfo *BPI = nullptr;
6318 if (auto *WrapperPass =
6319 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6320 BPI = &WrapperPass->getBPI();
6321
6322 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6323 BFI, BPI, PSI, Opts: InstCombineOptions());
6324}
6325
6326char InstructionCombiningPass::ID = 0;
6327
6328InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {}
6329
6330INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6331 "Combine redundant instructions", false, false)
6332INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6333INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6334INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6335INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6336INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6337INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6338INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6339INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6340INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6341INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6342 "Combine redundant instructions", false, false)
6343
6344// Initialization Routines.
6345void llvm::initializeInstCombine(PassRegistry &Registry) {
6346 initializeInstructionCombiningPassPass(Registry);
6347}
6348
6349FunctionPass *llvm::createInstructionCombiningPass() {
6350 return new InstructionCombiningPass();
6351}
6352