1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AssumptionCache.h"
45#include "llvm/Analysis/BasicAliasAnalysis.h"
46#include "llvm/Analysis/BlockFrequencyInfo.h"
47#include "llvm/Analysis/CFG.h"
48#include "llvm/Analysis/ConstantFolding.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/Analysis/InstructionSimplify.h"
51#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53#include "llvm/Analysis/MemoryBuiltins.h"
54#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55#include "llvm/Analysis/ProfileSummaryInfo.h"
56#include "llvm/Analysis/TargetFolder.h"
57#include "llvm/Analysis/TargetLibraryInfo.h"
58#include "llvm/Analysis/TargetTransformInfo.h"
59#include "llvm/Analysis/Utils/Local.h"
60#include "llvm/Analysis/ValueTracking.h"
61#include "llvm/Analysis/VectorUtils.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/Dominators.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
83#include "llvm/IR/PatternMatch.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
89#include "llvm/InitializePasses.h"
90#include "llvm/Support/Casting.h"
91#include "llvm/Support/CommandLine.h"
92#include "llvm/Support/Compiler.h"
93#include "llvm/Support/Debug.h"
94#include "llvm/Support/DebugCounter.h"
95#include "llvm/Support/ErrorHandling.h"
96#include "llvm/Support/KnownBits.h"
97#include "llvm/Support/KnownFPClass.h"
98#include "llvm/Support/raw_ostream.h"
99#include "llvm/Transforms/InstCombine/InstCombine.h"
100#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101#include "llvm/Transforms/Utils/Local.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
111#include "llvm/Transforms/Utils/InstructionWorklist.h"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(Val: true));
138
139static cl::opt<unsigned> MaxSinkNumUsers(
140 "instcombine-max-sink-users", cl::init(Val: 32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
143static cl::opt<unsigned>
144MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147namespace llvm {
148extern cl::opt<bool> ProfcheckDisableMetadataFixes;
149} // end namespace llvm
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(Val: true));
160
161std::optional<Instruction *>
162InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
163 // Handle target specific intrinsics
164 if (II.getCalledFunction()->isTargetIntrinsic()) {
165 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
166 }
167 return std::nullopt;
168}
169
170std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
171 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
172 bool &KnownBitsComputed) {
173 // Handle target specific intrinsics
174 if (II.getCalledFunction()->isTargetIntrinsic()) {
175 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
176 IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
177 }
178 return std::nullopt;
179}
180
181std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
182 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
183 APInt &PoisonElts2, APInt &PoisonElts3,
184 std::function<void(Instruction *, unsigned, APInt, APInt &)>
185 SimplifyAndSetOp) {
186 // Handle target specific intrinsics
187 if (II.getCalledFunction()->isTargetIntrinsic()) {
188 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
189 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
190 SimplifyAndSetOp);
191 }
192 return std::nullopt;
193}
194
195bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
196 // Approved exception for TTI use: This queries a legality property of the
197 // target, not an profitability heuristic. Ideally this should be part of
198 // DataLayout instead.
199 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
200}
201
202Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
203 if (!RewriteGEP)
204 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
205
206 IRBuilderBase::InsertPointGuard Guard(Builder);
207 auto *Inst = dyn_cast<Instruction>(Val: GEP);
208 if (Inst)
209 Builder.SetInsertPoint(Inst);
210
211 Value *Offset = EmitGEPOffset(GEP);
212 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
213 if (Inst && !GEP->hasAllConstantIndices() &&
214 !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) {
215 replaceInstUsesWith(
216 I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
217 IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
218 eraseInstFromFunction(I&: *Inst);
219 }
220 return Offset;
221}
222
223Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
224 GEPNoWrapFlags NW, Type *IdxTy,
225 bool RewriteGEPs) {
226 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
227 if (Sum)
228 return Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
229 HasNSW: NW.isInBounds());
230 else
231 return Offset;
232 };
233
234 Value *Sum = nullptr;
235 Value *OneUseSum = nullptr;
236 Value *OneUseBase = nullptr;
237 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
238 for (GEPOperator *GEP : reverse(C&: GEPs)) {
239 Value *Offset;
240 {
241 // Expand the offset at the point of the previous GEP to enable rewriting.
242 // However, use the original insertion point for calculating Sum.
243 IRBuilderBase::InsertPointGuard Guard(Builder);
244 auto *Inst = dyn_cast<Instruction>(Val: GEP);
245 if (RewriteGEPs && Inst)
246 Builder.SetInsertPoint(Inst);
247
248 Offset = llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
249 if (Offset->getType() != IdxTy)
250 Offset = Builder.CreateVectorSplat(
251 EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
252 if (GEP->hasOneUse()) {
253 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
254 OneUseSum = Add(OneUseSum, Offset);
255 OneUseFlags = OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
256 if (!OneUseBase)
257 OneUseBase = GEP->getPointerOperand();
258 continue;
259 }
260
261 if (OneUseSum)
262 Offset = Add(OneUseSum, Offset);
263
264 // Rewrite the GEP to reuse the computed offset. This also includes
265 // offsets from preceding one-use GEPs of matched type.
266 if (RewriteGEPs && Inst &&
267 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
268 !(GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8) &&
269 GEP->getOperand(i_nocapture: 1) == Offset)) {
270 replaceInstUsesWith(
271 I&: *Inst,
272 V: Builder.CreatePtrAdd(
273 Ptr: OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, Name: "",
274 NW: OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags())));
275 eraseInstFromFunction(I&: *Inst);
276 }
277 }
278
279 Sum = Add(Sum, Offset);
280 OneUseSum = OneUseBase = nullptr;
281 OneUseFlags = GEPNoWrapFlags::all();
282 }
283 if (OneUseSum)
284 Sum = Add(Sum, OneUseSum);
285 if (!Sum)
286 return Constant::getNullValue(Ty: IdxTy);
287 return Sum;
288}
289
290/// Legal integers and common types are considered desirable. This is used to
291/// avoid creating instructions with types that may not be supported well by the
292/// the backend.
293/// NOTE: This treats i8, i16 and i32 specially because they are common
294/// types in frontend languages.
295bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
296 switch (BitWidth) {
297 case 8:
298 case 16:
299 case 32:
300 return true;
301 default:
302 return DL.isLegalInteger(Width: BitWidth);
303 }
304}
305
306/// Return true if it is desirable to convert an integer computation from a
307/// given bit width to a new bit width.
308/// We don't want to convert from a legal or desirable type (like i8) to an
309/// illegal type or from a smaller to a larger illegal type. A width of '1'
310/// is always treated as a desirable type because i1 is a fundamental type in
311/// IR, and there are many specialized optimizations for i1 types.
312/// Common/desirable widths are equally treated as legal to convert to, in
313/// order to open up more combining opportunities.
314bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
315 unsigned ToWidth) const {
316 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
317 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
318
319 // Convert to desirable widths even if they are not legal types.
320 // Only shrink types, to prevent infinite loops.
321 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
322 return true;
323
324 // If this is a legal or desiable integer from type, and the result would be
325 // an illegal type, don't do the transformation.
326 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
327 return false;
328
329 // Otherwise, if both are illegal, do not increase the size of the result. We
330 // do allow things like i160 -> i64, but not i64 -> i160.
331 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
332 return false;
333
334 return true;
335}
336
337/// Return true if it is desirable to convert a computation from 'From' to 'To'.
338/// We don't want to convert from a legal to an illegal type or from a smaller
339/// to a larger illegal type. i1 is always treated as a legal type because it is
340/// a fundamental type in IR, and there are many specialized optimizations for
341/// i1 types.
342bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
343 // TODO: This could be extended to allow vectors. Datalayout changes might be
344 // needed to properly support that.
345 if (!From->isIntegerTy() || !To->isIntegerTy())
346 return false;
347
348 unsigned FromWidth = From->getPrimitiveSizeInBits();
349 unsigned ToWidth = To->getPrimitiveSizeInBits();
350 return shouldChangeType(FromWidth, ToWidth);
351}
352
353// Return true, if No Signed Wrap should be maintained for I.
354// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
355// where both B and C should be ConstantInts, results in a constant that does
356// not overflow. This function only handles the Add/Sub/Mul opcodes. For
357// all other opcodes, the function conservatively returns false.
358static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
359 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
360 if (!OBO || !OBO->hasNoSignedWrap())
361 return false;
362
363 const APInt *BVal, *CVal;
364 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
365 return false;
366
367 // We reason about Add/Sub/Mul Only.
368 bool Overflow = false;
369 switch (I.getOpcode()) {
370 case Instruction::Add:
371 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
372 break;
373 case Instruction::Sub:
374 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
375 break;
376 case Instruction::Mul:
377 (void)BVal->smul_ov(RHS: *CVal, Overflow);
378 break;
379 default:
380 // Conservatively return false for other opcodes.
381 return false;
382 }
383 return !Overflow;
384}
385
386static bool hasNoUnsignedWrap(BinaryOperator &I) {
387 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
388 return OBO && OBO->hasNoUnsignedWrap();
389}
390
391static bool hasNoSignedWrap(BinaryOperator &I) {
392 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
393 return OBO && OBO->hasNoSignedWrap();
394}
395
396/// Conservatively clears subclassOptionalData after a reassociation or
397/// commutation. We preserve fast-math flags when applicable as they can be
398/// preserved.
399static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
400 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
401 if (!FPMO) {
402 I.clearSubclassOptionalData();
403 return;
404 }
405
406 FastMathFlags FMF = I.getFastMathFlags();
407 I.clearSubclassOptionalData();
408 I.setFastMathFlags(FMF);
409}
410
411/// Combine constant operands of associative operations either before or after a
412/// cast to eliminate one of the associative operations:
413/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
414/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
415static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
416 InstCombinerImpl &IC) {
417 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
418 if (!Cast || !Cast->hasOneUse())
419 return false;
420
421 // TODO: Enhance logic for other casts and remove this check.
422 auto CastOpcode = Cast->getOpcode();
423 if (CastOpcode != Instruction::ZExt)
424 return false;
425
426 // TODO: Enhance logic for other BinOps and remove this check.
427 if (!BinOp1->isBitwiseLogicOp())
428 return false;
429
430 auto AssocOpcode = BinOp1->getOpcode();
431 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
432 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
433 return false;
434
435 Constant *C1, *C2;
436 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
437 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
438 return false;
439
440 // TODO: This assumes a zext cast.
441 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
442 // to the destination type might lose bits.
443
444 // Fold the constants together in the destination type:
445 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
446 const DataLayout &DL = IC.getDataLayout();
447 Type *DestTy = C1->getType();
448 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
449 if (!CastC2)
450 return false;
451 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
452 if (!FoldedC)
453 return false;
454
455 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
456 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
457 BinOp1->dropPoisonGeneratingFlags();
458 Cast->dropPoisonGeneratingFlags();
459 return true;
460}
461
462// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
463// inttoptr ( ptrtoint (x) ) --> x
464Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
465 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
466 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
467 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
468 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
469 Type *CastTy = IntToPtr->getDestTy();
470 if (PtrToInt &&
471 CastTy->getPointerAddressSpace() ==
472 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
473 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
474 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
475 return PtrToInt->getOperand(i_nocapture: 0);
476 }
477 return nullptr;
478}
479
480/// This performs a few simplifications for operators that are associative or
481/// commutative:
482///
483/// Commutative operators:
484///
485/// 1. Order operands such that they are listed from right (least complex) to
486/// left (most complex). This puts constants before unary operators before
487/// binary operators.
488///
489/// Associative operators:
490///
491/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
492/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
493///
494/// Associative and commutative operators:
495///
496/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
497/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
498/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
499/// if C1 and C2 are constants.
500bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
501 Instruction::BinaryOps Opcode = I.getOpcode();
502 bool Changed = false;
503
504 do {
505 // Order operands such that they are listed from right (least complex) to
506 // left (most complex). This puts constants before unary operators before
507 // binary operators.
508 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
509 getComplexity(V: I.getOperand(i_nocapture: 1)))
510 Changed = !I.swapOperands();
511
512 if (I.isCommutative()) {
513 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
514 replaceOperand(I, OpNum: 0, V: Pair->first);
515 replaceOperand(I, OpNum: 1, V: Pair->second);
516 Changed = true;
517 }
518 }
519
520 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
521 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
522
523 if (I.isAssociative()) {
524 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
525 if (Op0 && Op0->getOpcode() == Opcode) {
526 Value *A = Op0->getOperand(i_nocapture: 0);
527 Value *B = Op0->getOperand(i_nocapture: 1);
528 Value *C = I.getOperand(i_nocapture: 1);
529
530 // Does "B op C" simplify?
531 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
532 // It simplifies to V. Form "A op V".
533 replaceOperand(I, OpNum: 0, V: A);
534 replaceOperand(I, OpNum: 1, V);
535 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
536 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
537
538 // Conservatively clear all optional flags since they may not be
539 // preserved by the reassociation. Reset nsw/nuw based on the above
540 // analysis.
541 ClearSubclassDataAfterReassociation(I);
542
543 // Note: this is only valid because SimplifyBinOp doesn't look at
544 // the operands to Op0.
545 if (IsNUW)
546 I.setHasNoUnsignedWrap(true);
547
548 if (IsNSW)
549 I.setHasNoSignedWrap(true);
550
551 Changed = true;
552 ++NumReassoc;
553 continue;
554 }
555 }
556
557 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
558 if (Op1 && Op1->getOpcode() == Opcode) {
559 Value *A = I.getOperand(i_nocapture: 0);
560 Value *B = Op1->getOperand(i_nocapture: 0);
561 Value *C = Op1->getOperand(i_nocapture: 1);
562
563 // Does "A op B" simplify?
564 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
565 // It simplifies to V. Form "V op C".
566 replaceOperand(I, OpNum: 0, V);
567 replaceOperand(I, OpNum: 1, V: C);
568 // Conservatively clear the optional flags, since they may not be
569 // preserved by the reassociation.
570 ClearSubclassDataAfterReassociation(I);
571 Changed = true;
572 ++NumReassoc;
573 continue;
574 }
575 }
576 }
577
578 if (I.isAssociative() && I.isCommutative()) {
579 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
580 Changed = true;
581 ++NumReassoc;
582 continue;
583 }
584
585 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
586 if (Op0 && Op0->getOpcode() == Opcode) {
587 Value *A = Op0->getOperand(i_nocapture: 0);
588 Value *B = Op0->getOperand(i_nocapture: 1);
589 Value *C = I.getOperand(i_nocapture: 1);
590
591 // Does "C op A" simplify?
592 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
593 // It simplifies to V. Form "V op B".
594 replaceOperand(I, OpNum: 0, V);
595 replaceOperand(I, OpNum: 1, V: B);
596 // Conservatively clear the optional flags, since they may not be
597 // preserved by the reassociation.
598 ClearSubclassDataAfterReassociation(I);
599 Changed = true;
600 ++NumReassoc;
601 continue;
602 }
603 }
604
605 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
606 if (Op1 && Op1->getOpcode() == Opcode) {
607 Value *A = I.getOperand(i_nocapture: 0);
608 Value *B = Op1->getOperand(i_nocapture: 0);
609 Value *C = Op1->getOperand(i_nocapture: 1);
610
611 // Does "C op A" simplify?
612 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
613 // It simplifies to V. Form "B op V".
614 replaceOperand(I, OpNum: 0, V: B);
615 replaceOperand(I, OpNum: 1, V);
616 // Conservatively clear the optional flags, since they may not be
617 // preserved by the reassociation.
618 ClearSubclassDataAfterReassociation(I);
619 Changed = true;
620 ++NumReassoc;
621 continue;
622 }
623 }
624
625 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
626 // if C1 and C2 are constants.
627 Value *A, *B;
628 Constant *C1, *C2, *CRes;
629 if (Op0 && Op1 &&
630 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
631 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
632 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
633 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
634 bool IsNUW = hasNoUnsignedWrap(I) &&
635 hasNoUnsignedWrap(I&: *Op0) &&
636 hasNoUnsignedWrap(I&: *Op1);
637 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
638 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
639 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
640
641 if (isa<FPMathOperator>(Val: NewBO)) {
642 FastMathFlags Flags = I.getFastMathFlags() &
643 Op0->getFastMathFlags() &
644 Op1->getFastMathFlags();
645 NewBO->setFastMathFlags(Flags);
646 }
647 InsertNewInstWith(New: NewBO, Old: I.getIterator());
648 NewBO->takeName(V: Op1);
649 replaceOperand(I, OpNum: 0, V: NewBO);
650 replaceOperand(I, OpNum: 1, V: CRes);
651 // Conservatively clear the optional flags, since they may not be
652 // preserved by the reassociation.
653 ClearSubclassDataAfterReassociation(I);
654 if (IsNUW)
655 I.setHasNoUnsignedWrap(true);
656
657 Changed = true;
658 continue;
659 }
660 }
661
662 // No further simplifications.
663 return Changed;
664 } while (true);
665}
666
667/// Return whether "X LOp (Y ROp Z)" is always equal to
668/// "(X LOp Y) ROp (X LOp Z)".
669static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
670 Instruction::BinaryOps ROp) {
671 // X & (Y | Z) <--> (X & Y) | (X & Z)
672 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
673 if (LOp == Instruction::And)
674 return ROp == Instruction::Or || ROp == Instruction::Xor;
675
676 // X | (Y & Z) <--> (X | Y) & (X | Z)
677 if (LOp == Instruction::Or)
678 return ROp == Instruction::And;
679
680 // X * (Y + Z) <--> (X * Y) + (X * Z)
681 // X * (Y - Z) <--> (X * Y) - (X * Z)
682 if (LOp == Instruction::Mul)
683 return ROp == Instruction::Add || ROp == Instruction::Sub;
684
685 return false;
686}
687
688/// Return whether "(X LOp Y) ROp Z" is always equal to
689/// "(X ROp Z) LOp (Y ROp Z)".
690static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
691 Instruction::BinaryOps ROp) {
692 if (Instruction::isCommutative(Opcode: ROp))
693 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
694
695 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
696 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
697
698 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
699 // but this requires knowing that the addition does not overflow and other
700 // such subtleties.
701}
702
703/// This function returns identity value for given opcode, which can be used to
704/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
705static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
706 if (isa<Constant>(Val: V))
707 return nullptr;
708
709 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
710}
711
712/// This function predicates factorization using distributive laws. By default,
713/// it just returns the 'Op' inputs. But for special-cases like
714/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
715/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
716/// allow more factorization opportunities.
717static Instruction::BinaryOps
718getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
719 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
720 assert(Op && "Expected a binary operator");
721 LHS = Op->getOperand(i_nocapture: 0);
722 RHS = Op->getOperand(i_nocapture: 1);
723 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
724 Constant *C;
725 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
726 // X << C --> X * (1 << C)
727 RHS = ConstantFoldBinaryInstruction(
728 Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C);
729 assert(RHS && "Constant folding of immediate constants failed");
730 return Instruction::Mul;
731 }
732 // TODO: We can add other conversions e.g. shr => div etc.
733 }
734 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
735 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
736 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
737 // lshr nneg C, X --> ashr nneg C, X
738 return Instruction::AShr;
739 }
740 }
741 return Op->getOpcode();
742}
743
744/// This tries to simplify binary operations by factorizing out common terms
745/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
746static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
747 InstCombiner::BuilderTy &Builder,
748 Instruction::BinaryOps InnerOpcode, Value *A,
749 Value *B, Value *C, Value *D) {
750 assert(A && B && C && D && "All values must be provided");
751
752 Value *V = nullptr;
753 Value *RetVal = nullptr;
754 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
755 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
756
757 // Does "X op' Y" always equal "Y op' X"?
758 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
759
760 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
761 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
762 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
763 // commutative case, "(A op' B) op (C op' A)"?
764 if (A == C || (InnerCommutative && A == D)) {
765 if (A != C)
766 std::swap(a&: C, b&: D);
767 // Consider forming "A op' (B op D)".
768 // If "B op D" simplifies then it can be formed with no cost.
769 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
770
771 // If "B op D" doesn't simplify then only go on if one of the existing
772 // operations "A op' B" and "C op' D" will be zapped as no longer used.
773 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
774 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
775 if (V)
776 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
777 }
778 }
779
780 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
781 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
782 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
783 // commutative case, "(A op' B) op (B op' D)"?
784 if (B == D || (InnerCommutative && B == C)) {
785 if (B != D)
786 std::swap(a&: C, b&: D);
787 // Consider forming "(A op C) op' B".
788 // If "A op C" simplifies then it can be formed with no cost.
789 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
790
791 // If "A op C" doesn't simplify then only go on if one of the existing
792 // operations "A op' B" and "C op' D" will be zapped as no longer used.
793 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
794 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
795 if (V)
796 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
797 }
798 }
799
800 if (!RetVal)
801 return nullptr;
802
803 ++NumFactor;
804 RetVal->takeName(V: &I);
805
806 // Try to add no-overflow flags to the final value.
807 if (isa<BinaryOperator>(Val: RetVal)) {
808 bool HasNSW = false;
809 bool HasNUW = false;
810 if (isa<OverflowingBinaryOperator>(Val: &I)) {
811 HasNSW = I.hasNoSignedWrap();
812 HasNUW = I.hasNoUnsignedWrap();
813 }
814 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
815 HasNSW &= LOBO->hasNoSignedWrap();
816 HasNUW &= LOBO->hasNoUnsignedWrap();
817 }
818
819 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
820 HasNSW &= ROBO->hasNoSignedWrap();
821 HasNUW &= ROBO->hasNoUnsignedWrap();
822 }
823
824 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
825 // We can propagate 'nsw' if we know that
826 // %Y = mul nsw i16 %X, C
827 // %Z = add nsw i16 %Y, %X
828 // =>
829 // %Z = mul nsw i16 %X, C+1
830 //
831 // iff C+1 isn't INT_MIN
832 const APInt *CInt;
833 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
834 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
835
836 // nuw can be propagated with any constant or nuw value.
837 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
838 }
839 }
840 return RetVal;
841}
842
843// If `I` has one Const operand and the other matches `(ctpop (not x))`,
844// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
845// This is only useful is the new subtract can fold so we only handle the
846// following cases:
847// 1) (add/sub/disjoint_or C, (ctpop (not x))
848// -> (add/sub/disjoint_or C', (ctpop x))
849// 1) (cmp pred C, (ctpop (not x))
850// -> (cmp pred C', (ctpop x))
851Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
852 unsigned Opc = I->getOpcode();
853 unsigned ConstIdx = 1;
854 switch (Opc) {
855 default:
856 return nullptr;
857 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
858 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
859 // is constant.
860 case Instruction::Sub:
861 ConstIdx = 0;
862 break;
863 case Instruction::ICmp:
864 // Signed predicates aren't correct in some edge cases like for i2 types, as
865 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
866 // comparisons against it are simplfied to unsigned.
867 if (cast<ICmpInst>(Val: I)->isSigned())
868 return nullptr;
869 break;
870 case Instruction::Or:
871 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
872 return nullptr;
873 [[fallthrough]];
874 case Instruction::Add:
875 break;
876 }
877
878 Value *Op;
879 // Find ctpop.
880 if (!match(V: I->getOperand(i: 1 - ConstIdx),
881 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
882 return nullptr;
883
884 Constant *C;
885 // Check other operand is ImmConstant.
886 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
887 return nullptr;
888
889 Type *Ty = Op->getType();
890 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
891 // Need extra check for icmp. Note if this check is true, it generally means
892 // the icmp will simplify to true/false.
893 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
894 Constant *Cmp =
895 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
896 if (!Cmp || !Cmp->isNullValue())
897 return nullptr;
898 }
899
900 // Check we can invert `(not x)` for free.
901 bool Consumes = false;
902 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
903 return nullptr;
904 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
905 assert(NotOp != nullptr &&
906 "Desync between isFreeToInvert and getFreelyInverted");
907
908 Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
909
910 Value *R = nullptr;
911
912 // Do the transformation here to avoid potentially introducing an infinite
913 // loop.
914 switch (Opc) {
915 case Instruction::Sub:
916 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
917 break;
918 case Instruction::Or:
919 case Instruction::Add:
920 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
921 break;
922 case Instruction::ICmp:
923 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
924 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
925 break;
926 default:
927 llvm_unreachable("Unhandled Opcode");
928 }
929 assert(R != nullptr);
930 return replaceInstUsesWith(I&: *I, V: R);
931}
932
933// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
934// IFF
935// 1) the logic_shifts match
936// 2) either both binops are binops and one is `and` or
937// BinOp1 is `and`
938// (logic_shift (inv_logic_shift C1, C), C) == C1 or
939//
940// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
941//
942// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
943// IFF
944// 1) the logic_shifts match
945// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
946//
947// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
948//
949// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
950// IFF
951// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
952// 2) Binop2 is `not`
953//
954// -> (arithmetic_shift Binop1((not X), Y), Amt)
955
956Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
957 const DataLayout &DL = I.getDataLayout();
958 auto IsValidBinOpc = [](unsigned Opc) {
959 switch (Opc) {
960 default:
961 return false;
962 case Instruction::And:
963 case Instruction::Or:
964 case Instruction::Xor:
965 case Instruction::Add:
966 // Skip Sub as we only match constant masks which will canonicalize to use
967 // add.
968 return true;
969 }
970 };
971
972 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
973 // constraints.
974 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
975 unsigned ShOpc) {
976 assert(ShOpc != Instruction::AShr);
977 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
978 ShOpc == Instruction::Shl;
979 };
980
981 auto GetInvShift = [](unsigned ShOpc) {
982 assert(ShOpc != Instruction::AShr);
983 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
984 };
985
986 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
987 unsigned ShOpc, Constant *CMask,
988 Constant *CShift) {
989 // If the BinOp1 is `and` we don't need to check the mask.
990 if (BinOpc1 == Instruction::And)
991 return true;
992
993 // For all other possible transfers we need complete distributable
994 // binop/shift (anything but `add` + `lshr`).
995 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
996 return false;
997
998 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
999 // vecs, otherwise the mask will be simplified and the following check will
1000 // handle it).
1001 if (BinOpc2 == Instruction::And)
1002 return true;
1003
1004 // Otherwise, need mask that meets the below requirement.
1005 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1006 Constant *MaskInvShift =
1007 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1008 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
1009 CMask;
1010 };
1011
1012 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1013 Constant *CMask, *CShift;
1014 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1015 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
1016 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
1017 return nullptr;
1018 if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum),
1019 P: m_c_BinOp(L: m_CombineAnd(
1020 L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
1021 R: m_Value(V&: ShiftedX)),
1022 R: m_Value(V&: Mask))))
1023 return nullptr;
1024 // Make sure we are matching instruction shifts and not ConstantExpr
1025 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
1026 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
1027 if (!IY || !IX)
1028 return nullptr;
1029
1030 // LHS and RHS need same shift opcode
1031 unsigned ShOpc = IY->getOpcode();
1032 if (ShOpc != IX->getOpcode())
1033 return nullptr;
1034
1035 // Make sure binop is real instruction and not ConstantExpr
1036 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
1037 if (!BO2)
1038 return nullptr;
1039
1040 unsigned BinOpc = BO2->getOpcode();
1041 // Make sure we have valid binops.
1042 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1043 return nullptr;
1044
1045 if (ShOpc == Instruction::AShr) {
1046 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
1047 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
1048 Value *NotX = Builder.CreateNot(V: X);
1049 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
1050 return BinaryOperator::Create(
1051 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1052 }
1053
1054 return nullptr;
1055 }
1056
1057 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1058 // distribute to drop the shift irrelevant of constants.
1059 if (BinOpc == I.getOpcode() &&
1060 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1061 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1062 Value *NewBinOp1 = Builder.CreateBinOp(
1063 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1064 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1065 }
1066
1067 // Otherwise we can only distribute by constant shifting the mask, so
1068 // ensure we have constants.
1069 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1070 return nullptr;
1071 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1072 return nullptr;
1073
1074 // Check if we can distribute the binops.
1075 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1076 return nullptr;
1077
1078 Constant *NewCMask =
1079 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1080 Value *NewBinOp2 = Builder.CreateBinOp(
1081 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1082 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1083 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1084 S1: NewBinOp1, S2: CShift);
1085 };
1086
1087 if (Instruction *R = MatchBinOp(0))
1088 return R;
1089 return MatchBinOp(1);
1090}
1091
1092// (Binop (zext C), (select C, T, F))
1093// -> (select C, (binop 1, T), (binop 0, F))
1094//
1095// (Binop (sext C), (select C, T, F))
1096// -> (select C, (binop -1, T), (binop 0, F))
1097//
1098// Attempt to simplify binary operations into a select with folded args, when
1099// one operand of the binop is a select instruction and the other operand is a
1100// zext/sext extension, whose value is the select condition.
1101Instruction *
1102InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1103 // TODO: this simplification may be extended to any speculatable instruction,
1104 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1105 Instruction::BinaryOps Opc = I.getOpcode();
1106 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1107 Value *A, *CondVal, *TrueVal, *FalseVal;
1108 Value *CastOp;
1109 Constant *CastTrueVal, *CastFalseVal;
1110
1111 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1112 return match(V: CastOp, P: m_SelectLike(C: m_Value(V&: A), TrueC: m_Constant(C&: CastTrueVal),
1113 FalseC: m_Constant(C&: CastFalseVal))) &&
1114 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1115 R: m_Value(V&: FalseVal)));
1116 };
1117
1118 // Make sure one side of the binop is a select instruction, and the other is a
1119 // zero/sign extension operating on a i1.
1120 if (MatchSelectAndCast(LHS, RHS))
1121 CastOp = LHS;
1122 else if (MatchSelectAndCast(RHS, LHS))
1123 CastOp = RHS;
1124 else
1125 return nullptr;
1126
1127 SelectInst *SI = ProfcheckDisableMetadataFixes
1128 ? nullptr
1129 : cast<SelectInst>(Val: CastOp == LHS ? RHS : LHS);
1130
1131 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1132 bool IsCastOpRHS = (CastOp == RHS);
1133 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1134
1135 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: CastVal)
1136 : Builder.CreateBinOp(Opc, LHS: CastVal, RHS: V);
1137 };
1138
1139 // If the value used in the zext/sext is the select condition, or the negated
1140 // of the select condition, the binop can be simplified.
1141 if (CondVal == A) {
1142 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1143 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1144 S2: NewFoldedConst(true, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1145 }
1146 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1147 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1148 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1149 S2: NewFoldedConst(false, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1150 }
1151
1152 return nullptr;
1153}
1154
1155Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1156 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1157 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1158 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1159 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1160 Value *A, *B, *C, *D;
1161 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1162
1163 if (Op0)
1164 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1165 if (Op1)
1166 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1167
1168 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1169 // a common term.
1170 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1171 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1172 return V;
1173
1174 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1175 // term.
1176 if (Op0)
1177 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1178 if (Value *V =
1179 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1180 return V;
1181
1182 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1183 // term.
1184 if (Op1)
1185 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1186 if (Value *V =
1187 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1188 return V;
1189
1190 return nullptr;
1191}
1192
1193/// This tries to simplify binary operations which some other binary operation
1194/// distributes over either by factorizing out common terms
1195/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1196/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1197/// Returns the simplified value, or null if it didn't simplify.
1198Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1199 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1200 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1201 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1202 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1203
1204 // Factorization.
1205 if (Value *R = tryFactorizationFolds(I))
1206 return R;
1207
1208 // Expansion.
1209 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1210 // The instruction has the form "(A op' B) op C". See if expanding it out
1211 // to "(A op C) op' (B op C)" results in simplifications.
1212 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1213 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1214
1215 // Disable the use of undef because it's not safe to distribute undef.
1216 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1217 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1218 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1219
1220 // Do "A op C" and "B op C" both simplify?
1221 if (L && R) {
1222 // They do! Return "L op' R".
1223 ++NumExpand;
1224 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1225 C->takeName(V: &I);
1226 return C;
1227 }
1228
1229 // Does "A op C" simplify to the identity value for the inner opcode?
1230 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1231 // They do! Return "B op C".
1232 ++NumExpand;
1233 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1234 C->takeName(V: &I);
1235 return C;
1236 }
1237
1238 // Does "B op C" simplify to the identity value for the inner opcode?
1239 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1240 // They do! Return "A op C".
1241 ++NumExpand;
1242 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1243 C->takeName(V: &I);
1244 return C;
1245 }
1246 }
1247
1248 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1249 // The instruction has the form "A op (B op' C)". See if expanding it out
1250 // to "(A op B) op' (A op C)" results in simplifications.
1251 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1252 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1253
1254 // Disable the use of undef because it's not safe to distribute undef.
1255 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1256 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1257 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1258
1259 // Do "A op B" and "A op C" both simplify?
1260 if (L && R) {
1261 // They do! Return "L op' R".
1262 ++NumExpand;
1263 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1264 A->takeName(V: &I);
1265 return A;
1266 }
1267
1268 // Does "A op B" simplify to the identity value for the inner opcode?
1269 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1270 // They do! Return "A op C".
1271 ++NumExpand;
1272 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1273 A->takeName(V: &I);
1274 return A;
1275 }
1276
1277 // Does "A op C" simplify to the identity value for the inner opcode?
1278 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1279 // They do! Return "A op B".
1280 ++NumExpand;
1281 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1282 A->takeName(V: &I);
1283 return A;
1284 }
1285 }
1286
1287 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1288}
1289
1290static std::optional<std::pair<Value *, Value *>>
1291matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1292 if (LHS->getParent() != RHS->getParent())
1293 return std::nullopt;
1294
1295 if (LHS->getNumIncomingValues() < 2)
1296 return std::nullopt;
1297
1298 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1299 return std::nullopt;
1300
1301 Value *L0 = LHS->getIncomingValue(i: 0);
1302 Value *R0 = RHS->getIncomingValue(i: 0);
1303
1304 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1305 Value *L1 = LHS->getIncomingValue(i: I);
1306 Value *R1 = RHS->getIncomingValue(i: I);
1307
1308 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1309 continue;
1310
1311 return std::nullopt;
1312 }
1313
1314 return std::optional(std::pair(L0, R0));
1315}
1316
1317std::optional<std::pair<Value *, Value *>>
1318InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1319 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1320 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1321 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1322 return std::nullopt;
1323 switch (LHSInst->getOpcode()) {
1324 case Instruction::PHI:
1325 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1326 case Instruction::Select: {
1327 Value *Cond = LHSInst->getOperand(i: 0);
1328 Value *TrueVal = LHSInst->getOperand(i: 1);
1329 Value *FalseVal = LHSInst->getOperand(i: 2);
1330 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1331 FalseVal == RHSInst->getOperand(i: 1))
1332 return std::pair(TrueVal, FalseVal);
1333 return std::nullopt;
1334 }
1335 case Instruction::Call: {
1336 // Match min(a, b) and max(a, b)
1337 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1338 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1339 if (LHSMinMax && RHSMinMax &&
1340 LHSMinMax->getPredicate() ==
1341 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1342 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1343 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1344 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1345 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1346 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1347 return std::nullopt;
1348 }
1349 default:
1350 return std::nullopt;
1351 }
1352}
1353
1354Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1355 Value *LHS,
1356 Value *RHS) {
1357 Value *A, *B, *C, *D, *E, *F;
1358 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1359 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1360 if (!LHSIsSelect && !RHSIsSelect)
1361 return nullptr;
1362
1363 SelectInst *SI = ProfcheckDisableMetadataFixes
1364 ? nullptr
1365 : cast<SelectInst>(Val: LHSIsSelect ? LHS : RHS);
1366
1367 FastMathFlags FMF;
1368 BuilderTy::FastMathFlagGuard Guard(Builder);
1369 if (const auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) {
1370 FMF = FPOp->getFastMathFlags();
1371 Builder.setFastMathFlags(FMF);
1372 }
1373
1374 Instruction::BinaryOps Opcode = I.getOpcode();
1375 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1376
1377 Value *Cond, *True = nullptr, *False = nullptr;
1378
1379 // Special-case for add/negate combination. Replace the zero in the negation
1380 // with the trailing add operand:
1381 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1382 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1383 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1384 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1385 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1386 return nullptr;
1387 Value *N;
1388 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1389 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1390 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName(), MDFrom: SI);
1391 }
1392 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1393 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1394 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName(), MDFrom: SI);
1395 }
1396 return nullptr;
1397 };
1398
1399 if (LHSIsSelect && RHSIsSelect && A == D) {
1400 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1401 Cond = A;
1402 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1403 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1404
1405 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1406 if (False && !True)
1407 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1408 else if (True && !False)
1409 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1410 }
1411 } else if (LHSIsSelect && LHS->hasOneUse()) {
1412 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1413 Cond = A;
1414 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1415 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1416 if (Value *NewSel = foldAddNegate(B, C, RHS))
1417 return NewSel;
1418 } else if (RHSIsSelect && RHS->hasOneUse()) {
1419 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1420 Cond = D;
1421 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1422 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(E, F, LHS))
1424 return NewSel;
1425 }
1426
1427 if (!True || !False)
1428 return nullptr;
1429
1430 Value *NewSI = Builder.CreateSelect(C: Cond, True, False, Name: I.getName(), MDFrom: SI);
1431 NewSI->takeName(V: &I);
1432 return NewSI;
1433}
1434
1435/// Freely adapt every user of V as-if V was changed to !V.
1436/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1437void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1438 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1439 for (User *U : make_early_inc_range(Range: I->users())) {
1440 if (U == IgnoredUser)
1441 continue; // Don't consider this user.
1442 switch (cast<Instruction>(Val: U)->getOpcode()) {
1443 case Instruction::Select: {
1444 auto *SI = cast<SelectInst>(Val: U);
1445 SI->swapValues();
1446 SI->swapProfMetadata();
1447 break;
1448 }
1449 case Instruction::CondBr: {
1450 CondBrInst *BI = cast<CondBrInst>(Val: U);
1451 BI->swapSuccessors(); // swaps prof metadata too
1452 if (BPI)
1453 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1454 break;
1455 }
1456 case Instruction::Xor:
1457 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1458 // Add to worklist for DCE.
1459 addToWorklist(I: cast<Instruction>(Val: U));
1460 break;
1461 default:
1462 llvm_unreachable("Got unexpected user - out of sync with "
1463 "canFreelyInvertAllUsersOf() ?");
1464 }
1465 }
1466
1467 // Update pre-existing debug value uses.
1468 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1469 llvm::findDbgValues(V: I, DbgVariableRecords);
1470
1471 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1472 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1473 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1474 Idx != End; ++Idx)
1475 if (DbgVal->getVariableLocationOp(OpIdx: Idx) == I)
1476 DbgVal->setExpression(
1477 DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1478 }
1479}
1480
1481/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1482/// constant zero (which is the 'negate' form).
1483Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1484 Value *NegV;
1485 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1486 return NegV;
1487
1488 // Constants can be considered to be negated values if they can be folded.
1489 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1490 return ConstantExpr::getNeg(C);
1491
1492 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1493 if (C->getType()->getElementType()->isIntegerTy())
1494 return ConstantExpr::getNeg(C);
1495
1496 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1497 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1498 Constant *Elt = CV->getAggregateElement(Elt: i);
1499 if (!Elt)
1500 return nullptr;
1501
1502 if (isa<UndefValue>(Val: Elt))
1503 continue;
1504
1505 if (!isa<ConstantInt>(Val: Elt))
1506 return nullptr;
1507 }
1508 return ConstantExpr::getNeg(C: CV);
1509 }
1510
1511 // Negate integer vector splats.
1512 if (auto *CV = dyn_cast<Constant>(Val: V))
1513 if (CV->getType()->isVectorTy() &&
1514 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1515 return ConstantExpr::getNeg(C: CV);
1516
1517 return nullptr;
1518}
1519
1520// Try to fold:
1521// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1522// -> ({s|u}itofp (int_binop x, y))
1523// 2) (fp_binop ({s|u}itofp x), FpC)
1524// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1525//
1526// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1527Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1528 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1529 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1530
1531 Type *FPTy = BO.getType();
1532 Type *IntTy = IntOps[0]->getType();
1533
1534 unsigned IntSz = IntTy->getScalarSizeInBits();
1535 // This is the maximum number of inuse bits by the integer where the int -> fp
1536 // casts are exact.
1537 unsigned MaxRepresentableBits =
1538 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1539
1540 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1541 // checks later on.
1542 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1543
1544 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1545 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1546 auto IsNonZero = [&](unsigned OpNo) -> bool {
1547 if (OpsKnown[OpNo].hasKnownBits() &&
1548 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1549 return true;
1550 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1551 };
1552
1553 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1554 // NB: This matches the impl in ValueTracking, we just try to use cached
1555 // knownbits here. If we ever start supporting WithCache for
1556 // `isKnownNonNegative`, change this to an explicit call.
1557 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1558 };
1559
1560 // Check if we know for certain that ({s|u}itofp op) is exact.
1561 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1562 // Can we treat this operand as the desired sign?
1563 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1564 !IsNonNeg(OpNo))
1565 return false;
1566
1567 // If fp precision >= bitwidth(op) then its exact.
1568 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1569 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1570 // handled specially. We can't, however, increase the bound arbitrarily for
1571 // `sitofp` as for larger sizes, it won't sign extend.
1572 if (MaxRepresentableBits < IntSz) {
1573 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1574 // numSignBits(op).
1575 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1576 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1577 if (OpsFromSigned)
1578 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1579 // Finally for unsigned check that fp precision >= bitwidth(op) -
1580 // numLeadingZeros(op).
1581 else {
1582 NumUsedLeadingBits[OpNo] =
1583 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1584 }
1585 }
1586 // NB: We could also check if op is known to be a power of 2 or zero (which
1587 // will always be representable). Its unlikely, however, that is we are
1588 // unable to bound op in any way we will be able to pass the overflow checks
1589 // later on.
1590
1591 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1592 return false;
1593 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1594 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1595 IsNonZero(OpNo);
1596 };
1597
1598 // If we have a constant rhs, see if we can losslessly convert it to an int.
1599 if (Op1FpC != nullptr) {
1600 // Signed + Mul req non-zero
1601 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1602 !match(V: Op1FpC, P: m_NonZeroFP()))
1603 return nullptr;
1604
1605 Constant *Op1IntC = ConstantFoldCastOperand(
1606 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1607 DestTy: IntTy, DL);
1608 if (Op1IntC == nullptr)
1609 return nullptr;
1610 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1611 : Instruction::UIToFP,
1612 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1613 return nullptr;
1614
1615 // First try to keep sign of cast the same.
1616 IntOps[1] = Op1IntC;
1617 }
1618
1619 // Ensure lhs/rhs integer types match.
1620 if (IntTy != IntOps[1]->getType())
1621 return nullptr;
1622
1623 if (Op1FpC == nullptr) {
1624 if (!IsValidPromotion(1))
1625 return nullptr;
1626 }
1627 if (!IsValidPromotion(0))
1628 return nullptr;
1629
1630 // Final we check if the integer version of the binop will not overflow.
1631 BinaryOperator::BinaryOps IntOpc;
1632 // Because of the precision check, we can often rule out overflows.
1633 bool NeedsOverflowCheck = true;
1634 // Try to conservatively rule out overflow based on the already done precision
1635 // checks.
1636 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1637 unsigned OverflowMaxCurBits =
1638 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1639 bool OutputSigned = OpsFromSigned;
1640 switch (BO.getOpcode()) {
1641 case Instruction::FAdd:
1642 IntOpc = Instruction::Add;
1643 OverflowMaxOutputBits += OverflowMaxCurBits;
1644 break;
1645 case Instruction::FSub:
1646 IntOpc = Instruction::Sub;
1647 OverflowMaxOutputBits += OverflowMaxCurBits;
1648 break;
1649 case Instruction::FMul:
1650 IntOpc = Instruction::Mul;
1651 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1652 break;
1653 default:
1654 llvm_unreachable("Unsupported binop");
1655 }
1656 // The precision check may have already ruled out overflow.
1657 if (OverflowMaxOutputBits < IntSz) {
1658 NeedsOverflowCheck = false;
1659 // We can bound unsigned overflow from sub to in range signed value (this is
1660 // what allows us to avoid the overflow check for sub).
1661 if (IntOpc == Instruction::Sub)
1662 OutputSigned = true;
1663 }
1664
1665 // Precision check did not rule out overflow, so need to check.
1666 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1667 // `IntOps[...]` arguments to `KnownOps[...]`.
1668 if (NeedsOverflowCheck &&
1669 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1670 return nullptr;
1671
1672 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1673 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1674 IntBO->setHasNoSignedWrap(OutputSigned);
1675 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1676 }
1677 if (OutputSigned)
1678 return new SIToFPInst(IntBinOp, FPTy);
1679 return new UIToFPInst(IntBinOp, FPTy);
1680}
1681
1682// Try to fold:
1683// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1684// -> ({s|u}itofp (int_binop x, y))
1685// 2) (fp_binop ({s|u}itofp x), FpC)
1686// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1687Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1688 // Don't perform the fold on vectors, as the integer operation may be much
1689 // more expensive than the float operation in that case.
1690 if (BO.getType()->isVectorTy())
1691 return nullptr;
1692
1693 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1694 Constant *Op1FpC = nullptr;
1695 // Check for:
1696 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1697 // 2) (binop ({s|u}itofp x), FpC)
1698 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_IToFP(Op: m_Value(V&: IntOps[0]))))
1699 return nullptr;
1700
1701 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1702 !match(V: BO.getOperand(i_nocapture: 1), P: m_IToFP(Op: m_Value(V&: IntOps[1]))))
1703 return nullptr;
1704
1705 // Cache KnownBits a bit to potentially save some analysis.
1706 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1707
1708 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1709 // different constraints depending on the sign of the cast.
1710 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1711 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1712 IntOps, Op1FpC, OpsKnown))
1713 return R;
1714 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1715 Op1FpC, OpsKnown);
1716}
1717
1718/// A binop with a constant operand and a sign-extended boolean operand may be
1719/// converted into a select of constants by applying the binary operation to
1720/// the constant with the two possible values of the extended boolean (0 or -1).
1721Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1722 // TODO: Handle non-commutative binop (constant is operand 0).
1723 // TODO: Handle zext.
1724 // TODO: Peek through 'not' of cast.
1725 Value *BO0 = BO.getOperand(i_nocapture: 0);
1726 Value *BO1 = BO.getOperand(i_nocapture: 1);
1727 Value *X;
1728 Constant *C;
1729 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1730 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1731 return nullptr;
1732
1733 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1734 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1735 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1736 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1737 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1738 return createSelectInstWithUnknownProfile(C: X, S1: TVal, S2: FVal);
1739}
1740
1741static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1742 bool IsTrueArm) {
1743 SmallVector<Value *> Ops;
1744 for (Value *Op : I.operands()) {
1745 Value *V = nullptr;
1746 if (Op == SI) {
1747 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1748 } else if (match(V: SI->getCondition(),
1749 P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1750 : ICmpInst::ICMP_NE,
1751 L: m_Specific(V: Op), R: m_Value(V))) &&
1752 isGuaranteedNotToBeUndefOrPoison(V)) {
1753 // Pass
1754 } else if (match(V: Op, P: m_ZExt(Op: m_Specific(V: SI->getCondition())))) {
1755 V = IsTrueArm ? ConstantInt::get(Ty: Op->getType(), V: 1)
1756 : ConstantInt::getNullValue(Ty: Op->getType());
1757 } else {
1758 V = Op;
1759 }
1760 Ops.push_back(Elt: V);
1761 }
1762
1763 return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1764}
1765
1766static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1767 Value *NewOp, InstCombiner &IC) {
1768 Instruction *Clone = I.clone();
1769 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1770 Clone->dropUBImplyingAttrsAndMetadata();
1771 IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1772 return Clone;
1773}
1774
1775Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1776 bool FoldWithMultiUse,
1777 bool SimplifyBothArms) {
1778 // Don't modify shared select instructions unless set FoldWithMultiUse
1779 if (!SI->hasOneUser() && !FoldWithMultiUse)
1780 return nullptr;
1781
1782 Value *TV = SI->getTrueValue();
1783 Value *FV = SI->getFalseValue();
1784
1785 // Bool selects with constant operands can be folded to logical ops.
1786 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1787 return nullptr;
1788
1789 // Avoid breaking min/max reduction pattern,
1790 // which is necessary for vectorization later.
1791 if (isa<MinMaxIntrinsic>(Val: &Op))
1792 for (Value *IntrinOp : Op.operands())
1793 if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1794 for (Value *PhiOp : PN->operands())
1795 if (PhiOp == &Op)
1796 return nullptr;
1797
1798 // Test if a FCmpInst instruction is used exclusively by a select as
1799 // part of a minimum or maximum operation. If so, refrain from doing
1800 // any other folding. This helps out other analyses which understand
1801 // non-obfuscated minimum and maximum idioms. And in this case, at
1802 // least one of the comparison operands has at least one user besides
1803 // the compare (the select), which would often largely negate the
1804 // benefit of folding anyway.
1805 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1806 if (CI->hasOneUse()) {
1807 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1808 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1809 !CI->isCommutative())
1810 return nullptr;
1811 }
1812 }
1813
1814 // Make sure that one of the select arms folds successfully.
1815 Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true);
1816 Value *NewFV =
1817 simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false);
1818 if (!NewTV && !NewFV)
1819 return nullptr;
1820
1821 if (SimplifyBothArms && !(NewTV && NewFV))
1822 return nullptr;
1823
1824 // Create an instruction for the arm that did not fold.
1825 if (!NewTV)
1826 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1827 if (!NewFV)
1828 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1829 return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1830}
1831
1832static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1833 Value *InValue, BasicBlock *InBB,
1834 const DataLayout &DL,
1835 const SimplifyQuery SQ) {
1836 // NB: It is a precondition of this transform that the operands be
1837 // phi translatable!
1838 SmallVector<Value *> Ops;
1839 for (Value *Op : I.operands()) {
1840 if (Op == PN)
1841 Ops.push_back(Elt: InValue);
1842 else
1843 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1844 }
1845
1846 // Don't consider the simplification successful if we get back a constant
1847 // expression. That's just an instruction in hiding.
1848 // Also reject the case where we simplify back to the phi node. We wouldn't
1849 // be able to remove it in that case.
1850 Value *NewVal = simplifyInstructionWithOperands(
1851 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1852 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1853 return NewVal;
1854
1855 // Check if incoming PHI value can be replaced with constant
1856 // based on implied condition.
1857 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(Val: InBB->getTerminator());
1858 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1859 if (TerminatorBI &&
1860 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1861 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1862 std::optional<bool> ImpliedCond = isImpliedCondition(
1863 LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1],
1864 DL, LHSIsTrue);
1865 if (ImpliedCond)
1866 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1867 }
1868
1869 return nullptr;
1870}
1871
1872/// In some cases it is beneficial to fold a select into a binary operator.
1873/// For example:
1874/// %1 = or %in, 4
1875/// %2 = select %cond, %1, %in
1876/// %3 = or %2, 1
1877/// =>
1878/// %1 = select i1 %cond, 5, 1
1879/// %2 = or %1, %in
1880Instruction *InstCombinerImpl::foldBinOpSelectBinOp(BinaryOperator &Op) {
1881 assert(Op.isAssociative() && "The operation must be associative!");
1882
1883 SelectInst *SI = dyn_cast<SelectInst>(Val: Op.getOperand(i_nocapture: 0));
1884
1885 Constant *Const;
1886 if (!SI || !match(V: Op.getOperand(i_nocapture: 1), P: m_ImmConstant(C&: Const)) ||
1887 !Op.hasOneUse() || !SI->hasOneUse())
1888 return nullptr;
1889
1890 Value *TV = SI->getTrueValue();
1891 Value *FV = SI->getFalseValue();
1892 Value *Input, *NewTV, *NewFV;
1893 Constant *Const2;
1894
1895 if (TV->hasOneUse() && match(V: TV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: FV),
1896 R: m_ImmConstant(C&: Const2)))) {
1897 NewTV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1898 NewFV = Const;
1899 Input = FV;
1900 } else if (FV->hasOneUse() &&
1901 match(V: FV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: TV),
1902 R: m_ImmConstant(C&: Const2)))) {
1903 NewTV = Const;
1904 NewFV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1905 Input = TV;
1906 } else
1907 return nullptr;
1908
1909 if (!NewTV || !NewFV)
1910 return nullptr;
1911
1912 Value *NewSI =
1913 Builder.CreateSelect(C: SI->getCondition(), True: NewTV, False: NewFV, Name: "",
1914 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : SI);
1915 return BinaryOperator::Create(Op: Op.getOpcode(), S1: NewSI, S2: Input);
1916}
1917
1918Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1919 bool AllowMultipleUses) {
1920 unsigned NumPHIValues = PN->getNumIncomingValues();
1921 if (NumPHIValues == 0)
1922 return nullptr;
1923
1924 // We normally only transform phis with a single use. However, if a PHI has
1925 // multiple uses and they are all the same operation, we can fold *all* of the
1926 // uses into the PHI.
1927 bool OneUse = PN->hasOneUse();
1928 bool IdenticalUsers = false;
1929 if (!AllowMultipleUses && !OneUse) {
1930 // Walk the use list for the instruction, comparing them to I.
1931 for (User *U : PN->users()) {
1932 Instruction *UI = cast<Instruction>(Val: U);
1933 if (UI != &I && !I.isIdenticalTo(I: UI))
1934 return nullptr;
1935 }
1936 // Otherwise, we can replace *all* users with the new PHI we form.
1937 IdenticalUsers = true;
1938 }
1939
1940 // Check that all operands are phi-translatable.
1941 for (Value *Op : I.operands()) {
1942 if (Op == PN)
1943 continue;
1944
1945 // Non-instructions never require phi-translation.
1946 auto *I = dyn_cast<Instruction>(Val: Op);
1947 if (!I)
1948 continue;
1949
1950 // Phi-translate can handle phi nodes in the same block.
1951 if (isa<PHINode>(Val: I))
1952 if (I->getParent() == PN->getParent())
1953 continue;
1954
1955 // Operand dominates the block, no phi-translation necessary.
1956 if (DT.dominates(Def: I, BB: PN->getParent()))
1957 continue;
1958
1959 // Not phi-translatable, bail out.
1960 return nullptr;
1961 }
1962
1963 // Check to see whether the instruction can be folded into each phi operand.
1964 // If there is one operand that does not fold, remember the BB it is in.
1965 SmallVector<Value *> NewPhiValues;
1966 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1967 bool SeenNonSimplifiedInVal = false;
1968 for (unsigned i = 0; i != NumPHIValues; ++i) {
1969 Value *InVal = PN->getIncomingValue(i);
1970 BasicBlock *InBB = PN->getIncomingBlock(i);
1971
1972 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1973 NewPhiValues.push_back(Elt: NewVal);
1974 continue;
1975 }
1976
1977 // Handle some cases that can't be fully simplified, but where we know that
1978 // the two instructions will fold into one.
1979 auto WillFold = [&]() {
1980 if (!InVal->hasUseList() || !InVal->hasOneUser())
1981 return false;
1982
1983 // icmp of ucmp/scmp with constant will fold to icmp.
1984 const APInt *Ignored;
1985 if (isa<CmpIntrinsic>(Val: InVal) &&
1986 match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
1987 return true;
1988
1989 // icmp eq zext(bool), 0 will fold to !bool.
1990 if (isa<ZExtInst>(Val: InVal) &&
1991 cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) &&
1992 match(V: &I,
1993 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
1994 return true;
1995
1996 return false;
1997 };
1998
1999 if (WillFold()) {
2000 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2001 NewPhiValues.push_back(Elt: nullptr);
2002 continue;
2003 }
2004
2005 if (!OneUse && !IdenticalUsers)
2006 return nullptr;
2007
2008 if (SeenNonSimplifiedInVal)
2009 return nullptr; // More than one non-simplified value.
2010 SeenNonSimplifiedInVal = true;
2011
2012 // If there is exactly one non-simplified value, we can insert a copy of the
2013 // operation in that block. However, if this is a critical edge, we would
2014 // be inserting the computation on some other paths (e.g. inside a loop).
2015 // Only do this if the pred block is unconditionally branching into the phi
2016 // block. Also, make sure that the pred block is not dead code.
2017 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: InBB->getTerminator());
2018 if (!BI || !DT.isReachableFromEntry(A: InBB))
2019 return nullptr;
2020
2021 NewPhiValues.push_back(Elt: nullptr);
2022 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2023
2024 // Do not push the operation across a loop backedge. This could result in
2025 // an infinite combine loop, and is generally non-profitable (especially
2026 // if the operation was originally outside the loop).
2027 if (isBackEdge(From: InBB, To: PN->getParent()))
2028 return nullptr;
2029 }
2030
2031 // Clone the instruction that uses the phi node and move it into the incoming
2032 // BB because we know that the next iteration of InstCombine will simplify it.
2033 SmallDenseMap<BasicBlock *, Instruction *> Clones;
2034 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2035 Value *Op = PN->getIncomingValue(i: OpIndex);
2036 BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
2037
2038 Instruction *Clone = Clones.lookup(Val: OpBB);
2039 if (!Clone) {
2040 Clone = I.clone();
2041 for (Use &U : Clone->operands()) {
2042 if (U == PN)
2043 U = Op;
2044 else
2045 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
2046 }
2047 Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
2048 Clones.insert(KV: {OpBB, Clone});
2049 // We may have speculated the instruction.
2050 Clone->dropUBImplyingAttrsAndMetadata();
2051 }
2052
2053 NewPhiValues[OpIndex] = Clone;
2054 }
2055
2056 // Okay, we can do the transformation: create the new PHI node.
2057 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
2058 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
2059 NewPN->takeName(V: PN);
2060 NewPN->setDebugLoc(PN->getDebugLoc());
2061
2062 for (unsigned i = 0; i != NumPHIValues; ++i)
2063 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
2064
2065 if (IdenticalUsers) {
2066 // Collect and deduplicate users up-front to avoid iterator invalidation.
2067 SmallSetVector<Instruction *, 4> ToReplace;
2068 for (User *U : PN->users()) {
2069 Instruction *User = cast<Instruction>(Val: U);
2070 if (User == &I)
2071 continue;
2072 ToReplace.insert(X: User);
2073 }
2074 for (Instruction *I : ToReplace) {
2075 replaceInstUsesWith(I&: *I, V: NewPN);
2076 eraseInstFromFunction(I&: *I);
2077 }
2078 OneUse = true;
2079 }
2080
2081 if (OneUse) {
2082 replaceAllDbgUsesWith(From&: *PN, To&: *NewPN, DomPoint&: *PN, DT);
2083 }
2084 return replaceInstUsesWith(I, V: NewPN);
2085}
2086
2087Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
2088 if (!BO.isAssociative())
2089 return nullptr;
2090
2091 // Find the interleaved binary ops.
2092 auto Opc = BO.getOpcode();
2093 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0));
2094 auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1));
2095 if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) ||
2096 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2097 !BO0->isAssociative() || !BO1->isAssociative() ||
2098 BO0->getParent() != BO1->getParent())
2099 return nullptr;
2100
2101 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2102 "Expected commutative instructions!");
2103
2104 // Find the matching phis, forming the recurrences.
2105 PHINode *PN0, *PN1;
2106 Value *Start0, *Step0, *Start1, *Step1;
2107 if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() ||
2108 !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() ||
2109 PN0->getParent() != PN1->getParent())
2110 return nullptr;
2111
2112 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2113 "Expected PHIs with two incoming values!");
2114
2115 // Convert the start and step values to constants.
2116 auto *Init0 = dyn_cast<Constant>(Val: Start0);
2117 auto *Init1 = dyn_cast<Constant>(Val: Start1);
2118 auto *C0 = dyn_cast<Constant>(Val: Step0);
2119 auto *C1 = dyn_cast<Constant>(Val: Step1);
2120 if (!Init0 || !Init1 || !C0 || !C1)
2121 return nullptr;
2122
2123 // Fold the recurrence constants.
2124 auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2125 auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2126 if (!Init || !C)
2127 return nullptr;
2128
2129 // Create the reduced PHI.
2130 auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2131 NameStr: "reduced.phi");
2132
2133 // Create the new binary op.
2134 auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2135 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2136 // Intersect FMF flags for FADD and FMUL.
2137 FastMathFlags Intersect = BO0->getFastMathFlags() &
2138 BO1->getFastMathFlags() & BO.getFastMathFlags();
2139 NewBO->setFastMathFlags(Intersect);
2140 } else {
2141 OverflowTracking Flags;
2142 Flags.AllKnownNonNegative = false;
2143 Flags.AllKnownNonZero = false;
2144 Flags.mergeFlags(I&: *BO0);
2145 Flags.mergeFlags(I&: *BO1);
2146 Flags.mergeFlags(I&: BO);
2147 Flags.applyFlags(I&: *NewBO);
2148 }
2149 NewBO->takeName(V: &BO);
2150
2151 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2152 auto *V = PN0->getIncomingValue(i: I);
2153 auto *BB = PN0->getIncomingBlock(i: I);
2154 if (V == Init0) {
2155 assert(((PN1->getIncomingValue(0) == Init1 &&
2156 PN1->getIncomingBlock(0) == BB) ||
2157 (PN1->getIncomingValue(1) == Init1 &&
2158 PN1->getIncomingBlock(1) == BB)) &&
2159 "Invalid incoming block!");
2160 NewPN->addIncoming(V: Init, BB);
2161 } else if (V == BO0) {
2162 assert(((PN1->getIncomingValue(0) == BO1 &&
2163 PN1->getIncomingBlock(0) == BB) ||
2164 (PN1->getIncomingValue(1) == BO1 &&
2165 PN1->getIncomingBlock(1) == BB)) &&
2166 "Invalid incoming block!");
2167 NewPN->addIncoming(V: NewBO, BB);
2168 } else
2169 llvm_unreachable("Unexpected incoming value!");
2170 }
2171
2172 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2173 << "\n with " << *PN1 << "\n " << *BO1
2174 << '\n');
2175
2176 // Insert the new recurrence and remove the old (dead) ones.
2177 InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2178 InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2179
2180 eraseInstFromFunction(
2181 I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType())));
2182 eraseInstFromFunction(
2183 I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType())));
2184 eraseInstFromFunction(I&: *PN0);
2185 eraseInstFromFunction(I&: *PN1);
2186
2187 return replaceInstUsesWith(I&: BO, V: NewBO);
2188}
2189
2190Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2191 // Attempt to fold binary operators whose operands are simple recurrences.
2192 if (auto *NewBO = foldBinopWithRecurrence(BO))
2193 return NewBO;
2194
2195 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2196 // we are guarding against replicating the binop in >1 predecessor.
2197 // This could miss matching a phi with 2 constant incoming values.
2198 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
2199 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
2200 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2201 Phi0->getNumOperands() != Phi1->getNumOperands())
2202 return nullptr;
2203
2204 // TODO: Remove the restriction for binop being in the same block as the phis.
2205 if (BO.getParent() != Phi0->getParent() ||
2206 BO.getParent() != Phi1->getParent())
2207 return nullptr;
2208
2209 // Fold if there is at least one specific constant value in phi0 or phi1's
2210 // incoming values that comes from the same block and this specific constant
2211 // value can be used to do optimization for specific binary operator.
2212 // For example:
2213 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2214 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2215 // %add = add i32 %phi0, %phi1
2216 // ==>
2217 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2218 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2219 /*AllowRHSConstant*/ false);
2220 if (C) {
2221 SmallVector<Value *, 4> NewIncomingValues;
2222 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2223 auto &Phi0Use = std::get<0>(t&: T);
2224 auto &Phi1Use = std::get<1>(t&: T);
2225 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2226 return false;
2227 Value *Phi0UseV = Phi0Use.get();
2228 Value *Phi1UseV = Phi1Use.get();
2229 if (Phi0UseV == C)
2230 NewIncomingValues.push_back(Elt: Phi1UseV);
2231 else if (Phi1UseV == C)
2232 NewIncomingValues.push_back(Elt: Phi0UseV);
2233 else
2234 return false;
2235 return true;
2236 };
2237
2238 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2239 P: CanFoldIncomingValuePair)) {
2240 PHINode *NewPhi =
2241 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2242 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2243 "The number of collected incoming values should equal the number "
2244 "of the original PHINode operands!");
2245 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2246 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
2247 return NewPhi;
2248 }
2249 }
2250
2251 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2252 return nullptr;
2253
2254 // Match a pair of incoming constants for one of the predecessor blocks.
2255 BasicBlock *ConstBB, *OtherBB;
2256 Constant *C0, *C1;
2257 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
2258 ConstBB = Phi0->getIncomingBlock(i: 0);
2259 OtherBB = Phi0->getIncomingBlock(i: 1);
2260 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
2261 ConstBB = Phi0->getIncomingBlock(i: 1);
2262 OtherBB = Phi0->getIncomingBlock(i: 0);
2263 } else {
2264 return nullptr;
2265 }
2266 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2267 return nullptr;
2268
2269 // The block that we are hoisting to must reach here unconditionally.
2270 // Otherwise, we could be speculatively executing an expensive or
2271 // non-speculative op.
2272 auto *PredBlockBranch = dyn_cast<UncondBrInst>(Val: OtherBB->getTerminator());
2273 if (!PredBlockBranch || !DT.isReachableFromEntry(A: OtherBB))
2274 return nullptr;
2275
2276 // TODO: This check could be tightened to only apply to binops (div/rem) that
2277 // are not safe to speculatively execute. But that could allow hoisting
2278 // potentially expensive instructions (fdiv for example).
2279 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2280 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2281 return nullptr;
2282
2283 // Fold constants for the predecessor block with constant incoming values.
2284 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2285 if (!NewC)
2286 return nullptr;
2287
2288 // Make a new binop in the predecessor block with the non-constant incoming
2289 // values.
2290 Builder.SetInsertPoint(PredBlockBranch);
2291 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2292 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2293 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2294 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2295 NotFoldedNewBO->copyIRFlags(V: &BO);
2296
2297 // Replace the binop with a phi of the new values. The old phis are dead.
2298 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
2299 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2300 NewPhi->addIncoming(V: NewC, BB: ConstBB);
2301 return NewPhi;
2302}
2303
2304Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2305 auto TryFoldOperand = [&](unsigned OpIdx,
2306 bool IsOtherParamConst) -> Instruction * {
2307 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: OpIdx)))
2308 return FoldOpIntoSelect(Op&: I, SI: Sel, FoldWithMultiUse: false, SimplifyBothArms: !IsOtherParamConst);
2309 if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: OpIdx)))
2310 return foldOpIntoPhi(I, PN);
2311 return nullptr;
2312 };
2313
2314 if (Instruction *NewI =
2315 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(Val: I.getOperand(i_nocapture: 1))))
2316 return NewI;
2317 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(Val: I.getOperand(i_nocapture: 0)));
2318}
2319
2320static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2321 // If this GEP has only 0 indices, it is the same pointer as
2322 // Src. If Src is not a trivial GEP too, don't combine
2323 // the indices.
2324 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2325 !Src.hasOneUse())
2326 return false;
2327 return true;
2328}
2329
2330/// Find a constant NewC that has property:
2331/// shuffle(NewC, ShMask) = C
2332/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2333///
2334/// A 1-to-1 mapping is not required. Example:
2335/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2336Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
2337 VectorType *NewCTy) {
2338 if (isa<ScalableVectorType>(Val: NewCTy)) {
2339 Constant *Splat = C->getSplatValue();
2340 if (!Splat)
2341 return nullptr;
2342 return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2343 }
2344
2345 if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2346 cast<FixedVectorType>(Val: C->getType())->getNumElements())
2347 return nullptr;
2348
2349 unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2350 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2351 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2352 unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2353 for (unsigned I = 0; I < NumElts; ++I) {
2354 Constant *CElt = C->getAggregateElement(Elt: I);
2355 if (ShMask[I] >= 0) {
2356 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2357 Constant *NewCElt = NewVecC[ShMask[I]];
2358 // Bail out if:
2359 // 1. The constant vector contains a constant expression.
2360 // 2. The shuffle needs an element of the constant vector that can't
2361 // be mapped to a new constant vector.
2362 // 3. This is a widening shuffle that copies elements of V1 into the
2363 // extended elements (extending with poison is allowed).
2364 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2365 I >= NewCNumElts)
2366 return nullptr;
2367 NewVecC[ShMask[I]] = CElt;
2368 }
2369 }
2370 return ConstantVector::get(V: NewVecC);
2371}
2372
2373// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2374static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
2375 Constant *Splat, bool SplatLHS,
2376 const DataLayout &DL) {
2377 ElementCount EC = cast<VectorType>(Val: Vector->getType())->getElementCount();
2378 Constant *LHS = ConstantVector::getSplat(EC, Elt: Splat);
2379 Constant *RHS = Vector;
2380 if (!SplatLHS)
2381 std::swap(a&: LHS, b&: RHS);
2382 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2383}
2384
2385template <Intrinsic::ID SpliceID>
2386static Instruction *foldSpliceBinOp(BinaryOperator &Inst,
2387 InstCombiner::BuilderTy &Builder) {
2388 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2389 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2390 Value *V = Builder.CreateBinOp(Opc: Inst.getOpcode(), LHS: X, RHS: Y, Name: Inst.getName());
2391 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2392 BO->copyIRFlags(V: &Inst);
2393 Module *M = Inst.getModule();
2394 Function *F = Intrinsic::getOrInsertDeclaration(M, id: SpliceID, OverloadTys: V->getType());
2395 return CallInst::Create(Func: F, Args: {V, PoisonValue::get(T: V->getType()), Offset});
2396 };
2397 Value *V1, *V2, *Offset;
2398 if (match(LHS,
2399 m_Intrinsic<SpliceID>(m_Value(V&: V1), m_Poison(), m_Value(V&: Offset)))) {
2400 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2401 // -> splice(Op(V1, V2), poison, offset)
2402 if (match(RHS, m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2403 m_Specific(V: Offset))) &&
2404 (LHS->hasOneUse() || RHS->hasOneUse() ||
2405 (LHS == RHS && LHS->hasNUses(N: 2))))
2406 return CreateBinOpSplice(V1, V2, Offset);
2407
2408 // Op(splice(V1, poison, offset), RHSSplat)
2409 // -> splice(Op(V1, RHSSplat), poison, offset)
2410 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2411 return CreateBinOpSplice(V1, RHS, Offset);
2412 }
2413 // Op(LHSSplat, splice(V2, poison, offset))
2414 // -> splice(Op(LHSSplat, V2), poison, offset)
2415 else if (isSplatValue(V: LHS) &&
2416 match(RHS, m_OneUse(m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2417 m_Value(V&: Offset)))))
2418 return CreateBinOpSplice(LHS, V2, Offset);
2419
2420 // TODO: Fold binops of the form
2421 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2422 // -> splice(poison, Op(V1, V2), offset)
2423
2424 return nullptr;
2425}
2426
2427Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2428 if (!isa<VectorType>(Val: Inst.getType()))
2429 return nullptr;
2430
2431 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2432 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2433 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2434 cast<VectorType>(Inst.getType())->getElementCount());
2435 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2436 cast<VectorType>(Inst.getType())->getElementCount());
2437
2438 auto foldConstantsThroughSubVectorInsertSplat =
2439 [&](Value *MaybeSubVector, Value *MaybeSplat,
2440 bool SplatLHS) -> Instruction * {
2441 Value *Idx;
2442 Constant *Splat, *SubVector, *Dest;
2443 if (!match(V: MaybeSplat, P: m_ConstantSplat(SubPattern: m_Constant(C&: Splat))) ||
2444 !match(V: MaybeSubVector,
2445 P: m_VectorInsert(Op0: m_Constant(C&: Dest), Op1: m_Constant(C&: SubVector),
2446 Op2: m_Value(V&: Idx))))
2447 return nullptr;
2448 SubVector =
2449 constantFoldBinOpWithSplat(Opcode, Vector: SubVector, Splat, SplatLHS, DL);
2450 Dest = constantFoldBinOpWithSplat(Opcode, Vector: Dest, Splat, SplatLHS, DL);
2451 if (!SubVector || !Dest)
2452 return nullptr;
2453 auto *InsertVector =
2454 Builder.CreateInsertVector(DstType: Dest->getType(), SrcVec: Dest, SubVec: SubVector, Idx);
2455 return replaceInstUsesWith(I&: Inst, V: InsertVector);
2456 };
2457
2458 // If one operand is a constant splat and the other operand is a
2459 // `vector.insert` where both the destination and subvector are constant,
2460 // apply the operation to both the destination and subvector, returning a new
2461 // constant `vector.insert`. This helps constant folding for scalable vectors.
2462 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2463 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2464 return Folded;
2465 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2466 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2467 return Folded;
2468
2469 // If both operands of the binop are vector concatenations, then perform the
2470 // narrow binop on each pair of the source operands followed by concatenation
2471 // of the results.
2472 Value *L0, *L1, *R0, *R1;
2473 ArrayRef<int> Mask;
2474 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2475 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2476 LHS->hasOneUse() && RHS->hasOneUse() &&
2477 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2478 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2479 // This transform does not have the speculative execution constraint as
2480 // below because the shuffle is a concatenation. The new binops are
2481 // operating on exactly the same elements as the existing binop.
2482 // TODO: We could ease the mask requirement to allow different undef lanes,
2483 // but that requires an analysis of the binop-with-undef output value.
2484 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2485 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2486 BO->copyIRFlags(V: &Inst);
2487 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2488 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2489 BO->copyIRFlags(V: &Inst);
2490 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2491 }
2492
2493 auto createBinOpReverse = [&](Value *X, Value *Y) {
2494 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2495 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2496 BO->copyIRFlags(V: &Inst);
2497 Module *M = Inst.getModule();
2498 Function *F = Intrinsic::getOrInsertDeclaration(
2499 M, id: Intrinsic::vector_reverse, OverloadTys: V->getType());
2500 return CallInst::Create(Func: F, Args: V);
2501 };
2502
2503 // NOTE: Reverse shuffles don't require the speculative execution protection
2504 // below because they don't affect which lanes take part in the computation.
2505
2506 Value *V1, *V2;
2507 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2508 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2509 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2510 (LHS->hasOneUse() || RHS->hasOneUse() ||
2511 (LHS == RHS && LHS->hasNUses(N: 2))))
2512 return createBinOpReverse(V1, V2);
2513
2514 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2515 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2516 return createBinOpReverse(V1, RHS);
2517 }
2518 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2519 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2520 return createBinOpReverse(LHS, V2);
2521
2522 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2523 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2524 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2525 BO->copyIRFlags(V: &Inst);
2526
2527 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2528 Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2529 Module *M = Inst.getModule();
2530 Function *F = Intrinsic::getOrInsertDeclaration(
2531 M, id: Intrinsic::experimental_vp_reverse, OverloadTys: V->getType());
2532 return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2533 };
2534
2535 Value *EVL;
2536 if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2537 Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2538 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2539 if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2540 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2541 (LHS->hasOneUse() || RHS->hasOneUse() ||
2542 (LHS == RHS && LHS->hasNUses(N: 2))))
2543 return createBinOpVPReverse(V1, V2, EVL);
2544
2545 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2546 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2547 return createBinOpVPReverse(V1, RHS, EVL);
2548 }
2549 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2550 else if (isSplatValue(V: LHS) &&
2551 match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2552 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2553 return createBinOpVPReverse(LHS, V2, EVL);
2554
2555 if (Instruction *Folded =
2556 foldSpliceBinOp<Intrinsic::vector_splice_left>(Inst, Builder))
2557 return Folded;
2558 if (Instruction *Folded =
2559 foldSpliceBinOp<Intrinsic::vector_splice_right>(Inst, Builder))
2560 return Folded;
2561
2562 // It may not be safe to reorder shuffles and things like div, urem, etc.
2563 // because we may trap when executing those ops on unknown vector elements.
2564 // See PR20059.
2565 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2566 return nullptr;
2567
2568 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2569 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2570 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2571 BO->copyIRFlags(V: &Inst);
2572 return new ShuffleVectorInst(XY, M);
2573 };
2574
2575 // If both arguments of the binary operation are shuffles that use the same
2576 // mask and shuffle within a single vector, move the shuffle after the binop.
2577 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2578 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2579 V1->getType() == V2->getType() &&
2580 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2581 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2582 return createBinOpShuffle(V1, V2, Mask);
2583 }
2584
2585 // If both arguments of a commutative binop are select-shuffles that use the
2586 // same mask with commuted operands, the shuffles are unnecessary.
2587 if (Inst.isCommutative() &&
2588 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2589 match(V: RHS,
2590 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2591 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2592 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2593 // TODO: Allow shuffles that contain undefs in the mask?
2594 // That is legal, but it reduces undef knowledge.
2595 // TODO: Allow arbitrary shuffles by shuffling after binop?
2596 // That might be legal, but we have to deal with poison.
2597 if (LShuf->isSelect() &&
2598 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2599 RShuf->isSelect() &&
2600 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2601 // Example:
2602 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2603 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2604 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2605 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2606 NewBO->copyIRFlags(V: &Inst);
2607 return NewBO;
2608 }
2609 }
2610
2611 // If one argument is a shuffle within one vector and the other is a constant,
2612 // try moving the shuffle after the binary operation. This canonicalization
2613 // intends to move shuffles closer to other shuffles and binops closer to
2614 // other binops, so they can be folded. It may also enable demanded elements
2615 // transforms.
2616 Constant *C;
2617 if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2618 mask: m_Mask(Mask))),
2619 R: m_ImmConstant(C)))) {
2620 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2621 "Shuffle should not change scalar type");
2622
2623 bool ConstOp1 = isa<Constant>(Val: RHS);
2624 if (Constant *NewC =
2625 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2626 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2627 // which will cause UB for div/rem. Mask them with a safe constant.
2628 if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2629 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2630
2631 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2632 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2633 Value *NewLHS = ConstOp1 ? V1 : NewC;
2634 Value *NewRHS = ConstOp1 ? NewC : V1;
2635 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2636 }
2637 }
2638
2639 // Try to reassociate to sink a splat shuffle after a binary operation.
2640 if (Inst.isAssociative() && Inst.isCommutative()) {
2641 // Canonicalize shuffle operand as LHS.
2642 if (isa<ShuffleVectorInst>(Val: RHS))
2643 std::swap(a&: LHS, b&: RHS);
2644
2645 Value *X;
2646 ArrayRef<int> MaskC;
2647 int SplatIndex;
2648 Value *Y, *OtherOp;
2649 if (!match(V: LHS,
2650 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2651 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2652 X->getType() != Inst.getType() ||
2653 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2654 return nullptr;
2655
2656 // FIXME: This may not be safe if the analysis allows undef elements. By
2657 // moving 'Y' before the splat shuffle, we are implicitly assuming
2658 // that it is not undef/poison at the splat index.
2659 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2660 std::swap(a&: Y, b&: OtherOp);
2661 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2662 return nullptr;
2663 }
2664
2665 // X and Y are splatted values, so perform the binary operation on those
2666 // values followed by a splat followed by the 2nd binary operation:
2667 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2668 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2669 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2670 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2671 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2672
2673 // Intersect FMF on both new binops. Other (poison-generating) flags are
2674 // dropped to be safe.
2675 if (isa<FPMathOperator>(Val: R)) {
2676 R->copyFastMathFlags(I: &Inst);
2677 R->andIRFlags(V: RHS);
2678 }
2679 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2680 NewInstBO->copyIRFlags(V: R);
2681 return R;
2682 }
2683
2684 return nullptr;
2685}
2686
2687/// Try to narrow the width of a binop if at least 1 operand is an extend of
2688/// of a value. This requires a potentially expensive known bits check to make
2689/// sure the narrow op does not overflow.
2690Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2691 // We need at least one extended operand.
2692 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2693
2694 // If this is a sub, we swap the operands since we always want an extension
2695 // on the RHS. The LHS can be an extension or a constant.
2696 if (BO.getOpcode() == Instruction::Sub)
2697 std::swap(a&: Op0, b&: Op1);
2698
2699 Value *X;
2700 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2701 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2702 return nullptr;
2703
2704 // If both operands are the same extension from the same source type and we
2705 // can eliminate at least one (hasOneUse), this might work.
2706 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2707 Value *Y;
2708 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2709 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2710 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2711 // If that did not match, see if we have a suitable constant operand.
2712 // Truncating and extending must produce the same constant.
2713 Constant *WideC;
2714 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2715 return nullptr;
2716 Constant *NarrowC = getLosslessInvCast(C: WideC, InvCastTo: X->getType(), CastOp: CastOpc, DL);
2717 if (!NarrowC)
2718 return nullptr;
2719 Y = NarrowC;
2720 }
2721
2722 // Swap back now that we found our operands.
2723 if (BO.getOpcode() == Instruction::Sub)
2724 std::swap(a&: X, b&: Y);
2725
2726 // Both operands have narrow versions. Last step: the math must not overflow
2727 // in the narrow width.
2728 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2729 return nullptr;
2730
2731 // bo (ext X), (ext Y) --> ext (bo X, Y)
2732 // bo (ext X), C --> ext (bo X, C')
2733 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2734 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2735 if (IsSext)
2736 NewBinOp->setHasNoSignedWrap();
2737 else
2738 NewBinOp->setHasNoUnsignedWrap();
2739 }
2740 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2741}
2742
2743/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2744/// transform.
2745static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2746 GEPOperator &GEP2) {
2747 return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2748}
2749
2750/// Thread a GEP operation with constant indices through the constant true/false
2751/// arms of a select.
2752static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2753 InstCombiner::BuilderTy &Builder) {
2754 if (!GEP.hasAllConstantIndices())
2755 return nullptr;
2756
2757 Instruction *Sel;
2758 Value *Cond;
2759 Constant *TrueC, *FalseC;
2760 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2761 !match(V: Sel,
2762 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2763 return nullptr;
2764
2765 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2766 // Propagate 'inbounds' and metadata from existing instructions.
2767 // Note: using IRBuilder to create the constants for efficiency.
2768 SmallVector<Value *, 4> IndexC(GEP.indices());
2769 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2770 Type *Ty = GEP.getSourceElementType();
2771 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2772 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2773 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2774}
2775
2776// Canonicalization:
2777// gep T, (gep i8, base, C1), (Index + C2) into
2778// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2779static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2780 GEPOperator *Src,
2781 InstCombinerImpl &IC) {
2782 if (GEP.getNumIndices() != 1)
2783 return nullptr;
2784 auto &DL = IC.getDataLayout();
2785 Value *Base;
2786 const APInt *C1;
2787 if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2788 return nullptr;
2789 Value *VarIndex;
2790 const APInt *C2;
2791 Type *PtrTy = Src->getType()->getScalarType();
2792 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2793 if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2794 return nullptr;
2795 if (C1->getBitWidth() != IndexSizeInBits ||
2796 C2->getBitWidth() != IndexSizeInBits)
2797 return nullptr;
2798 Type *BaseType = GEP.getSourceElementType();
2799 if (isa<ScalableVectorType>(Val: BaseType))
2800 return nullptr;
2801 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2802 APInt NewOffset = TypeSize * *C2 + *C1;
2803 if (NewOffset.isZero() ||
2804 (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) {
2805 GEPNoWrapFlags Flags = GEPNoWrapFlags::none();
2806 if (GEP.hasNoUnsignedWrap() &&
2807 cast<GEPOperator>(Val: Src)->hasNoUnsignedWrap() &&
2808 match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()))) {
2809 Flags |= GEPNoWrapFlags::noUnsignedWrap();
2810 if (GEP.isInBounds() && cast<GEPOperator>(Val: Src)->isInBounds())
2811 Flags |= GEPNoWrapFlags::inBounds();
2812 }
2813
2814 Value *GEPConst =
2815 IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset), Name: "", NW: Flags);
2816 return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex, NW: Flags);
2817 }
2818
2819 return nullptr;
2820}
2821
2822/// Combine constant offsets separated by variable offsets.
2823/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2824static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2825 InstCombinerImpl &IC) {
2826 if (!GEP.hasAllConstantIndices())
2827 return nullptr;
2828
2829 GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2830 SmallVector<GetElementPtrInst *> Skipped;
2831 auto *InnerGEP = dyn_cast<GetElementPtrInst>(Val: GEP.getPointerOperand());
2832 while (true) {
2833 if (!InnerGEP)
2834 return nullptr;
2835
2836 NW = NW.intersectForReassociate(Other: InnerGEP->getNoWrapFlags());
2837 if (InnerGEP->hasAllConstantIndices())
2838 break;
2839
2840 if (!InnerGEP->hasOneUse())
2841 return nullptr;
2842
2843 Skipped.push_back(Elt: InnerGEP);
2844 InnerGEP = dyn_cast<GetElementPtrInst>(Val: InnerGEP->getPointerOperand());
2845 }
2846
2847 // The two constant offset GEPs are directly adjacent: Let normal offset
2848 // merging handle it.
2849 if (Skipped.empty())
2850 return nullptr;
2851
2852 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2853 // if profitable.
2854 if (!InnerGEP->hasOneUse())
2855 return nullptr;
2856
2857 // Don't bother with vector splats.
2858 Type *Ty = GEP.getType();
2859 if (InnerGEP->getType() != Ty)
2860 return nullptr;
2861
2862 const DataLayout &DL = IC.getDataLayout();
2863 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2864 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2865 !InnerGEP->accumulateConstantOffset(DL, Offset))
2866 return nullptr;
2867
2868 IC.replaceOperand(I&: *Skipped.back(), OpNum: 0, V: InnerGEP->getPointerOperand());
2869 for (GetElementPtrInst *SkippedGEP : Skipped)
2870 SkippedGEP->setNoWrapFlags(NW);
2871
2872 return IC.replaceInstUsesWith(
2873 I&: GEP,
2874 V: IC.Builder.CreatePtrAdd(Ptr: Skipped.front(), Offset: IC.Builder.getInt(AI: Offset), Name: "",
2875 NW: NW.intersectForOffsetAdd(Other: GEP.getNoWrapFlags())));
2876}
2877
2878Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2879 GEPOperator *Src) {
2880 // Combine Indices - If the source pointer to this getelementptr instruction
2881 // is a getelementptr instruction with matching element type, combine the
2882 // indices of the two getelementptr instructions into a single instruction.
2883 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2884 return nullptr;
2885
2886 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this))
2887 return I;
2888
2889 if (auto *I = combineConstantOffsets(GEP, IC&: *this))
2890 return I;
2891
2892 if (Src->getResultElementType() != GEP.getSourceElementType())
2893 return nullptr;
2894
2895 // Fold chained GEP with constant base into single GEP:
2896 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2897 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2898 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2899 Src->getNumIndices() == 1) {
2900 Value *SrcIdx = *Src->idx_begin();
2901 Value *GEPIdx = *GEP.idx_begin();
2902 const APInt *ConstOffset, *TrueVal, *FalseVal;
2903 Value *Cond;
2904
2905 if ((match(V: SrcIdx, P: m_APInt(Res&: ConstOffset)) &&
2906 match(V: GEPIdx,
2907 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal)))) ||
2908 (match(V: GEPIdx, P: m_APInt(Res&: ConstOffset)) &&
2909 match(V: SrcIdx,
2910 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal))))) {
2911 auto *Select = isa<SelectInst>(Val: GEPIdx) ? cast<SelectInst>(Val: GEPIdx)
2912 : cast<SelectInst>(Val: SrcIdx);
2913
2914 // Make sure the select has only one use.
2915 if (!Select->hasOneUse())
2916 return nullptr;
2917
2918 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2919 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2920 return nullptr;
2921
2922 APInt NewTrueVal = *ConstOffset + *TrueVal;
2923 APInt NewFalseVal = *ConstOffset + *FalseVal;
2924 Constant *NewTrue = ConstantInt::get(Ty: Select->getType(), V: NewTrueVal);
2925 Constant *NewFalse = ConstantInt::get(Ty: Select->getType(), V: NewFalseVal);
2926 Value *NewSelect = Builder.CreateSelect(
2927 C: Cond, True: NewTrue, False: NewFalse, /*Name=*/"",
2928 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2929 GEPNoWrapFlags Flags =
2930 getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2931 return replaceInstUsesWith(I&: GEP,
2932 V: Builder.CreateGEP(Ty: GEP.getResultElementType(),
2933 Ptr: Src->getPointerOperand(),
2934 IdxList: NewSelect, Name: "", NW: Flags));
2935 }
2936 }
2937
2938 // Find out whether the last index in the source GEP is a sequential idx.
2939 bool EndsWithSequential = false;
2940 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2941 I != E; ++I)
2942 EndsWithSequential = I.isSequential();
2943 if (!EndsWithSequential)
2944 return nullptr;
2945
2946 // Replace: gep (gep %P, long B), long A, ...
2947 // With: T = long A+B; gep %P, T, ...
2948 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands() - 1);
2949 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2950
2951 // If they aren't the same type, then the input hasn't been processed
2952 // by the loop above yet (which canonicalizes sequential index types to
2953 // intptr_t). Just avoid transforming this until the input has been
2954 // normalized.
2955 if (SO1->getType() != GO1->getType())
2956 return nullptr;
2957
2958 Value *Sum =
2959 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2960 // Only do the combine when we are sure the cost after the
2961 // merge is never more than that before the merge.
2962 if (Sum == nullptr)
2963 return nullptr;
2964
2965 SmallVector<Value *, 8> Indices;
2966 Indices.append(in_start: Src->op_begin() + 1, in_end: Src->op_end() - 1);
2967 Indices.push_back(Elt: Sum);
2968 Indices.append(in_start: GEP.op_begin() + 2, in_end: GEP.op_end());
2969
2970 // Don't create GEPs with more than one non-zero index.
2971 unsigned NumNonZeroIndices = count_if(Range&: Indices, P: [](Value *Idx) {
2972 auto *C = dyn_cast<Constant>(Val: Idx);
2973 return !C || !C->isNullValue();
2974 });
2975 if (NumNonZeroIndices > 1)
2976 return nullptr;
2977
2978 return replaceInstUsesWith(
2979 I&: GEP, V: Builder.CreateGEP(
2980 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
2981 NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2982}
2983
2984Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
2985 BuilderTy *Builder,
2986 bool &DoesConsume, unsigned Depth) {
2987 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2988 // ~(~(X)) -> X.
2989 Value *A, *B;
2990 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2991 DoesConsume = true;
2992 return A;
2993 }
2994
2995 Constant *C;
2996 // Constants can be considered to be not'ed values.
2997 if (match(V, P: m_ImmConstant(C)))
2998 return ConstantExpr::getNot(C);
2999
3000 if (Depth++ >= MaxAnalysisRecursionDepth)
3001 return nullptr;
3002
3003 // The rest of the cases require that we invert all uses so don't bother
3004 // doing the analysis if we know we can't use the result.
3005 if (!WillInvertAllUses)
3006 return nullptr;
3007
3008 // Compares can be inverted if all of their uses are being modified to use
3009 // the ~V.
3010 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
3011 if (Builder != nullptr)
3012 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
3013 RHS: I->getOperand(i_nocapture: 1));
3014 return NonNull;
3015 }
3016
3017 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3018 // `(-1 - B) - A` if we are willing to invert all of the uses.
3019 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3020 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3021 DoesConsume, Depth))
3022 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
3023 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3024 DoesConsume, Depth))
3025 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
3026 return nullptr;
3027 }
3028
3029 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3030 // into `A ^ B` if we are willing to invert all of the uses.
3031 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3032 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3033 DoesConsume, Depth))
3034 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
3035 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3036 DoesConsume, Depth))
3037 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
3038 return nullptr;
3039 }
3040
3041 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3042 // `A + (-1 - B)` if we are willing to invert all of the uses.
3043 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3044 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3045 DoesConsume, Depth))
3046 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
3047 return nullptr;
3048 }
3049
3050 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3051 // into `A s>> B` if we are willing to invert all of the uses.
3052 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3053 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3054 DoesConsume, Depth))
3055 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
3056 return nullptr;
3057 }
3058
3059 Value *Cond;
3060 // LogicOps are special in that we canonicalize them at the cost of an
3061 // instruction.
3062 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
3063 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
3064 // Selects/min/max with invertible operands are freely invertible
3065 if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3066 bool LocalDoesConsume = DoesConsume;
3067 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
3068 DoesConsume&: LocalDoesConsume, Depth))
3069 return nullptr;
3070 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3071 DoesConsume&: LocalDoesConsume, Depth)) {
3072 DoesConsume = LocalDoesConsume;
3073 if (Builder != nullptr) {
3074 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3075 DoesConsume, Depth);
3076 assert(NotB != nullptr &&
3077 "Unable to build inverted value for known freely invertable op");
3078 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
3079 return Builder->CreateBinaryIntrinsic(
3080 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
3081 return Builder->CreateSelect(
3082 C: Cond, True: NotA, False: NotB, Name: "",
3083 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : cast<Instruction>(Val: V));
3084 }
3085 return NonNull;
3086 }
3087 }
3088
3089 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
3090 bool LocalDoesConsume = DoesConsume;
3091 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
3092 for (Use &U : PN->operands()) {
3093 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3094 Value *NewIncomingVal = getFreelyInvertedImpl(
3095 V: U.get(), /*WillInvertAllUses=*/false,
3096 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
3097 if (NewIncomingVal == nullptr)
3098 return nullptr;
3099 // Make sure that we can safely erase the original PHI node.
3100 if (NewIncomingVal == V)
3101 return nullptr;
3102 if (Builder != nullptr)
3103 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
3104 }
3105
3106 DoesConsume = LocalDoesConsume;
3107 if (Builder != nullptr) {
3108 IRBuilderBase::InsertPointGuard Guard(*Builder);
3109 Builder->SetInsertPoint(PN);
3110 PHINode *NewPN =
3111 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
3112 for (auto [Val, Pred] : IncomingValues)
3113 NewPN->addIncoming(V: Val, BB: Pred);
3114 return NewPN;
3115 }
3116 return NonNull;
3117 }
3118
3119 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
3120 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3121 DoesConsume, Depth))
3122 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
3123 return nullptr;
3124 }
3125
3126 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
3127 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3128 DoesConsume, Depth))
3129 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
3130 return nullptr;
3131 }
3132
3133 // De Morgan's Laws:
3134 // (~(A | B)) -> (~A & ~B)
3135 // (~(A & B)) -> (~A | ~B)
3136 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3137 bool IsLogical, Value *A,
3138 Value *B) -> Value * {
3139 bool LocalDoesConsume = DoesConsume;
3140 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
3141 DoesConsume&: LocalDoesConsume, Depth))
3142 return nullptr;
3143 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3144 DoesConsume&: LocalDoesConsume, Depth)) {
3145 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3146 DoesConsume&: LocalDoesConsume, Depth);
3147 DoesConsume = LocalDoesConsume;
3148 if (IsLogical)
3149 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
3150 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
3151 }
3152
3153 return nullptr;
3154 };
3155
3156 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
3157 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3158 B);
3159
3160 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
3161 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3162 B);
3163
3164 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
3165 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3166 B);
3167
3168 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
3169 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3170 B);
3171
3172 return nullptr;
3173}
3174
3175/// Return true if we should canonicalize the gep to an i8 ptradd.
3176static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
3177 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3178 Type *GEPEltType = GEP.getSourceElementType();
3179 if (GEPEltType->isIntegerTy(Bitwidth: 8))
3180 return false;
3181
3182 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3183 // intrinsic. This has better support in BasicAA.
3184 if (GEPEltType->isScalableTy())
3185 return true;
3186
3187 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3188 // together.
3189 if (GEP.getNumIndices() == 1 &&
3190 match(V: GEP.getOperand(i_nocapture: 1),
3191 P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
3192 R: m_Shl(L: m_Value(), R: m_ConstantInt())))))
3193 return true;
3194
3195 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3196 // possibly be merged together.
3197 auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
3198 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3199 any_of(Range: GEP.indices(), P: [](Value *V) {
3200 const APInt *C;
3201 return match(V, P: m_APInt(Res&: C)) && !C->isZero();
3202 });
3203}
3204
3205static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN,
3206 IRBuilderBase &Builder) {
3207 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
3208 if (!Op1)
3209 return nullptr;
3210
3211 // Don't fold a GEP into itself through a PHI node. This can only happen
3212 // through the back-edge of a loop. Folding a GEP into itself means that
3213 // the value of the previous iteration needs to be stored in the meantime,
3214 // thus requiring an additional register variable to be live, but not
3215 // actually achieving anything (the GEP still needs to be executed once per
3216 // loop iteration).
3217 if (Op1 == &GEP)
3218 return nullptr;
3219 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3220
3221 int DI = -1;
3222
3223 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3224 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
3225 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3226 Op1->getSourceElementType() != Op2->getSourceElementType())
3227 return nullptr;
3228
3229 // As for Op1 above, don't try to fold a GEP into itself.
3230 if (Op2 == &GEP)
3231 return nullptr;
3232
3233 // Keep track of the type as we walk the GEP.
3234 Type *CurTy = nullptr;
3235
3236 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3237 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3238 return nullptr;
3239
3240 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3241 if (DI == -1) {
3242 // We have not seen any differences yet in the GEPs feeding the
3243 // PHI yet, so we record this one if it is allowed to be a
3244 // variable.
3245
3246 // The first two arguments can vary for any GEP, the rest have to be
3247 // static for struct slots
3248 if (J > 1) {
3249 assert(CurTy && "No current type?");
3250 if (CurTy->isStructTy())
3251 return nullptr;
3252 }
3253
3254 DI = J;
3255 } else {
3256 // The GEP is different by more than one input. While this could be
3257 // extended to support GEPs that vary by more than one variable it
3258 // doesn't make sense since it greatly increases the complexity and
3259 // would result in an R+R+R addressing mode which no backend
3260 // directly supports and would need to be broken into several
3261 // simpler instructions anyway.
3262 return nullptr;
3263 }
3264 }
3265
3266 // Sink down a layer of the type for the next iteration.
3267 if (J > 0) {
3268 if (J == 1) {
3269 CurTy = Op1->getSourceElementType();
3270 } else {
3271 CurTy =
3272 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3273 }
3274 }
3275 }
3276
3277 NW &= Op2->getNoWrapFlags();
3278 }
3279
3280 // If not all GEPs are identical we'll have to create a new PHI node.
3281 // Check that the old PHI node has only one use so that it will get
3282 // removed.
3283 if (DI != -1 && !PN->hasOneUse())
3284 return nullptr;
3285
3286 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3287 NewGEP->setNoWrapFlags(NW);
3288
3289 if (DI == -1) {
3290 // All the GEPs feeding the PHI are identical. Clone one down into our
3291 // BB so that it can be merged with the current GEP.
3292 } else {
3293 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3294 // into the current block so it can be merged, and create a new PHI to
3295 // set that index.
3296 PHINode *NewPN;
3297 {
3298 IRBuilderBase::InsertPointGuard Guard(Builder);
3299 Builder.SetInsertPoint(PN);
3300 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3301 NumReservedValues: PN->getNumOperands());
3302 }
3303
3304 for (auto &I : PN->operands())
3305 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3306 BB: PN->getIncomingBlock(U: I));
3307
3308 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3309 }
3310
3311 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3312 return NewGEP;
3313}
3314
3315Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3316 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3317 SmallVector<Value *, 8> Indices(GEP.indices());
3318 Type *GEPType = GEP.getType();
3319 Type *GEPEltType = GEP.getSourceElementType();
3320 if (Value *V =
3321 simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3322 Q: SQ.getWithInstruction(I: &GEP)))
3323 return replaceInstUsesWith(I&: GEP, V);
3324
3325 // For vector geps, use the generic demanded vector support.
3326 // Skip if GEP return type is scalable. The number of elements is unknown at
3327 // compile-time.
3328 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3329 auto VWidth = GEPFVTy->getNumElements();
3330 APInt PoisonElts(VWidth, 0);
3331 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3332 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3333 PoisonElts)) {
3334 if (V != &GEP)
3335 return replaceInstUsesWith(I&: GEP, V);
3336 return &GEP;
3337 }
3338 }
3339
3340 // Eliminate unneeded casts for indices, and replace indices which displace
3341 // by multiples of a zero size type with zero.
3342 bool MadeChange = false;
3343
3344 // Index width may not be the same width as pointer width.
3345 // Data layout chooses the right type based on supported integer types.
3346 Type *NewScalarIndexTy =
3347 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3348
3349 gep_type_iterator GTI = gep_type_begin(GEP);
3350 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3351 ++I, ++GTI) {
3352 // Skip indices into struct types.
3353 if (GTI.isStruct())
3354 continue;
3355
3356 Type *IndexTy = (*I)->getType();
3357 Type *NewIndexType =
3358 IndexTy->isVectorTy()
3359 ? VectorType::get(ElementType: NewScalarIndexTy,
3360 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3361 : NewScalarIndexTy;
3362
3363 // If the element type has zero size then any index over it is equivalent
3364 // to an index of zero, so replace it with zero if it is not zero already.
3365 Type *EltTy = GTI.getIndexedType();
3366 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3367 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
3368 *I = Constant::getNullValue(Ty: NewIndexType);
3369 MadeChange = true;
3370 }
3371
3372 if (IndexTy != NewIndexType) {
3373 // If we are using a wider index than needed for this platform, shrink
3374 // it to what we need. If narrower, sign-extend it to what we need.
3375 // This explicit cast can make subsequent optimizations more obvious.
3376 if (IndexTy->getScalarSizeInBits() <
3377 NewIndexType->getScalarSizeInBits()) {
3378 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3379 *I = Builder.CreateZExt(V: *I, DestTy: NewIndexType, Name: "", /*IsNonNeg=*/true);
3380 else
3381 *I = Builder.CreateSExt(V: *I, DestTy: NewIndexType);
3382 } else {
3383 *I = Builder.CreateTrunc(V: *I, DestTy: NewIndexType, Name: "", IsNUW: GEP.hasNoUnsignedWrap(),
3384 IsNSW: GEP.hasNoUnsignedSignedWrap());
3385 }
3386 MadeChange = true;
3387 }
3388 }
3389 if (MadeChange)
3390 return &GEP;
3391
3392 // Canonicalize constant GEPs to i8 type.
3393 if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) {
3394 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
3395 if (GEP.accumulateConstantOffset(DL, Offset))
3396 return replaceInstUsesWith(
3397 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3398 NW: GEP.getNoWrapFlags()));
3399 }
3400
3401 if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3402 Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3403 Value *NewGEP =
3404 Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3405 return replaceInstUsesWith(I&: GEP, V: NewGEP);
3406 }
3407
3408 // Strip trailing zero indices.
3409 auto *LastIdx = dyn_cast<Constant>(Val: Indices.back());
3410 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3411 return replaceInstUsesWith(
3412 I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: PtrOp,
3413 IdxList: drop_end(RangeOrContainer&: Indices), Name: "", NW: GEP.getNoWrapFlags()));
3414 }
3415
3416 // Strip leading zero indices.
3417 auto *FirstIdx = dyn_cast<Constant>(Val: Indices.front());
3418 if (FirstIdx && FirstIdx->isNullValue() &&
3419 !FirstIdx->getType()->isVectorTy()) {
3420 gep_type_iterator GTI = gep_type_begin(GEP);
3421 ++GTI;
3422 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3423 DL.getTypeAllocSize(Ty: GTI.getIndexedType()))
3424 return replaceInstUsesWith(I&: GEP, V: Builder.CreateGEP(Ty: GTI.getIndexedType(),
3425 Ptr: GEP.getPointerOperand(),
3426 IdxList: drop_begin(RangeOrContainer&: Indices), Name: "",
3427 NW: GEP.getNoWrapFlags()));
3428 }
3429
3430 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3431 // Note that this looses information about undef lanes; we run it after
3432 // demanded bits to partially mitigate that loss.
3433 if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3434 return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3435 })) {
3436 SmallVector<Value *> NewOps;
3437 for (auto &Op : GEP.operands()) {
3438 if (Op->getType()->isVectorTy())
3439 if (Value *Scalar = getSplatValue(V: Op)) {
3440 NewOps.push_back(Elt: Scalar);
3441 continue;
3442 }
3443 NewOps.push_back(Elt: Op);
3444 }
3445
3446 Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0],
3447 IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3448 NW: GEP.getNoWrapFlags());
3449 if (!Res->getType()->isVectorTy()) {
3450 ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3451 Res = Builder.CreateVectorSplat(EC, V: Res);
3452 }
3453 return replaceInstUsesWith(I&: GEP, V: Res);
3454 }
3455
3456 bool SeenNonZeroIndex = false;
3457 for (auto [IdxNum, Idx] : enumerate(First&: Indices)) {
3458 // Ignore one leading zero index.
3459 auto *C = dyn_cast<Constant>(Val: Idx);
3460 if (C && C->isNullValue() && IdxNum == 0)
3461 continue;
3462
3463 if (!SeenNonZeroIndex) {
3464 SeenNonZeroIndex = true;
3465 continue;
3466 }
3467
3468 // GEP has multiple non-zero indices: Split it.
3469 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(N: IdxNum);
3470 Value *FrontGEP =
3471 Builder.CreateGEP(Ty: GEPEltType, Ptr: PtrOp, IdxList: FrontIndices,
3472 Name: GEP.getName() + ".split", NW: GEP.getNoWrapFlags());
3473
3474 SmallVector<Value *> BackIndices;
3475 BackIndices.push_back(Elt: Constant::getNullValue(Ty: NewScalarIndexTy));
3476 append_range(C&: BackIndices, R: drop_begin(RangeOrContainer&: Indices, N: IdxNum));
3477 return GetElementPtrInst::Create(
3478 PointeeType: GetElementPtrInst::getIndexedType(Ty: GEPEltType, IdxList: FrontIndices), Ptr: FrontGEP,
3479 IdxList: BackIndices, NW: GEP.getNoWrapFlags());
3480 }
3481
3482 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3483 auto IsCanonicalType = [](Type *Ty) {
3484 if (auto *AT = dyn_cast<ArrayType>(Val: Ty))
3485 Ty = AT->getElementType();
3486 return Ty->isIntegerTy(Bitwidth: 8);
3487 };
3488 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3489 TypeSize Scale = DL.getTypeAllocSize(Ty: GEPEltType);
3490 assert(!Scale.isScalable() && "Should have been handled earlier");
3491 Type *NewElemTy = Builder.getInt8Ty();
3492 if (Scale.getFixedValue() != 1)
3493 NewElemTy = ArrayType::get(ElementType: NewElemTy, NumElements: Scale.getFixedValue());
3494 GEP.setSourceElementType(NewElemTy);
3495 GEP.setResultElementType(NewElemTy);
3496 // Don't bother revisiting the GEP after this change.
3497 MadeIRChange = true;
3498 }
3499
3500 // Check to see if the inputs to the PHI node are getelementptr instructions.
3501 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3502 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3503 return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp);
3504 }
3505
3506 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3507 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3508 return I;
3509
3510 if (GEP.getNumIndices() == 1) {
3511 unsigned AS = GEP.getPointerAddressSpace();
3512 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
3513 DL.getIndexSizeInBits(AS)) {
3514 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3515
3516 if (TyAllocSize == 1) {
3517 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3518 // but only if the result pointer is only used as if it were an integer.
3519 // (The case where the underlying object is the same is handled by
3520 // InstSimplify.)
3521 Value *X = GEP.getPointerOperand();
3522 Value *Y;
3523 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_Sub(L: m_PtrToIntOrAddr(Op: m_Value(V&: Y)),
3524 R: m_PtrToIntOrAddr(Op: m_Specific(V: X)))) &&
3525 GEPType == Y->getType()) {
3526 bool HasNonAddressBits =
3527 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3528 bool Changed = GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3529 return isa<PtrToAddrInst, ICmpInst>(Val: U.getUser()) ||
3530 (!HasNonAddressBits && isa<PtrToIntInst>(Val: U.getUser()));
3531 });
3532 return Changed ? &GEP : nullptr;
3533 }
3534 } else if (auto *ExactIns =
3535 dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) {
3536 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3537 Value *V;
3538 if (ExactIns->isExact()) {
3539 if ((has_single_bit(Value: TyAllocSize) &&
3540 match(V: GEP.getOperand(i_nocapture: 1),
3541 P: m_Shr(L: m_Value(V),
3542 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) ||
3543 match(V: GEP.getOperand(i_nocapture: 1),
3544 P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3545 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3546 Ptr: GEP.getPointerOperand(), IdxList: V,
3547 NW: GEP.getNoWrapFlags());
3548 }
3549 }
3550 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3551 // Try to canonicalize non-i8 element type to i8 if the index is an
3552 // exact instruction. If the index is an exact instruction (div/shr)
3553 // with a constant RHS, we can fold the non-i8 element scale into the
3554 // div/shr (similiar to the mul case, just inverted).
3555 const APInt *C;
3556 std::optional<APInt> NewC;
3557 if (has_single_bit(Value: TyAllocSize) &&
3558 match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3559 C->uge(RHS: countr_zero(Val: TyAllocSize)))
3560 NewC = *C - countr_zero(Val: TyAllocSize);
3561 else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3562 APInt Quot;
3563 uint64_t Rem;
3564 APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3565 if (Rem == 0)
3566 NewC = Quot;
3567 } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3568 APInt Quot;
3569 int64_t Rem;
3570 APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3571 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3572 if (!Quot.isAllOnes() && Rem == 0)
3573 NewC = Quot;
3574 }
3575
3576 if (NewC.has_value()) {
3577 Value *NewOp = Builder.CreateBinOp(
3578 Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3579 RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
3580 cast<BinaryOperator>(Val: NewOp)->setIsExact();
3581 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3582 Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3583 NW: GEP.getNoWrapFlags());
3584 }
3585 }
3586 }
3587 }
3588 }
3589 // We do not handle pointer-vector geps here.
3590 if (GEPType->isVectorTy())
3591 return nullptr;
3592
3593 if (!GEP.isInBounds()) {
3594 unsigned IdxWidth =
3595 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3596 APInt BasePtrOffset(IdxWidth, 0);
3597 Value *UnderlyingPtrOp =
3598 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3599 bool CanBeNull, CanBeFreed;
3600 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3601 DL, CanBeNull, CanBeFreed);
3602 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3603 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3604 BasePtrOffset.isNonNegative()) {
3605 APInt AllocSize(IdxWidth, DerefBytes);
3606 if (BasePtrOffset.ule(RHS: AllocSize)) {
3607 return GetElementPtrInst::CreateInBounds(
3608 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3609 }
3610 }
3611 }
3612 }
3613
3614 // nusw + nneg -> nuw
3615 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3616 all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3617 return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3618 })) {
3619 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3620 return &GEP;
3621 }
3622
3623 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3624 // to do this after having tried to derive "nuw" above.
3625 if (GEP.getNumIndices() == 1) {
3626 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3627 // geps if transforming into (gep (gep p, x), y).
3628 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3629 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3630 // that x + y does not have unsigned wrap.
3631 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3632 return GEP.getNoWrapFlags();
3633 return GEPNoWrapFlags::none();
3634 };
3635
3636 // Try to replace ADD + GEP with GEP + GEP.
3637 Value *Idx1, *Idx2;
3638 if (match(V: GEP.getOperand(i_nocapture: 1),
3639 P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3640 // %idx = add i64 %idx1, %idx2
3641 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3642 // as:
3643 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3644 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3645 bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3646 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3647 auto *NewPtr =
3648 Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3649 IdxList: Idx1, Name: "", NW: NWFlags);
3650 return replaceInstUsesWith(I&: GEP,
3651 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3652 Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3653 }
3654 ConstantInt *C;
3655 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3656 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3657 // %add = add nsw i32 %idx1, idx2
3658 // %sidx = sext i32 %add to i64
3659 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3660 // as:
3661 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3662 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3663 bool NUW = match(V: GEP.getOperand(i_nocapture: 1),
3664 P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3665 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3666 auto *NewPtr = Builder.CreateGEP(
3667 Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3668 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "", NW: NWFlags);
3669 return replaceInstUsesWith(
3670 I&: GEP,
3671 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3672 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()),
3673 Name: "", NW: NWFlags));
3674 }
3675 }
3676
3677 if (Instruction *R = foldSelectGEP(GEP, Builder))
3678 return R;
3679
3680 // srem -> (and/urem) for inbounds+nuw GEP
3681 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3682 Value *X, *Y;
3683
3684 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3685 if (match(V: Indices[0], P: m_OneUse(SubPattern: m_SRem(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
3686 isKnownToBeAPowerOfTwo(V: Y, /*OrZero=*/true, CxtI: &GEP)) {
3687 // If GEP is inbounds+nuw, the offset cannot be negative
3688 // -> srem by power-of-two can be treated as urem,
3689 // and urem by power-of-two folds to 'and' later.
3690 // OrZero=true is fine here because division by zero is UB.
3691 Instruction *OldIdxI = cast<Instruction>(Val: Indices[0]);
3692 Value *NewIdx = Builder.CreateURem(LHS: X, RHS: Y, Name: OldIdxI->getName());
3693
3694 return GetElementPtrInst::Create(PointeeType: GEPEltType, Ptr: PtrOp, IdxList: {NewIdx},
3695 NW: GEP.getNoWrapFlags());
3696 }
3697 }
3698
3699 return nullptr;
3700}
3701
3702static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
3703 Instruction *AI) {
3704 if (isa<ConstantPointerNull>(Val: V))
3705 return true;
3706 if (auto *LI = dyn_cast<LoadInst>(Val: V))
3707 return isa<GlobalVariable>(Val: LI->getPointerOperand());
3708 // Two distinct allocations will never be equal.
3709 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3710}
3711
3712/// Given a call CB which uses an address UsedV, return true if we can prove the
3713/// call's only possible effect is storing to V.
3714static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3715 const TargetLibraryInfo &TLI) {
3716 if (!CB.use_empty())
3717 // TODO: add recursion if returned attribute is present
3718 return false;
3719
3720 if (CB.isTerminator())
3721 // TODO: remove implementation restriction
3722 return false;
3723
3724 if (!CB.willReturn() || !CB.doesNotThrow())
3725 return false;
3726
3727 // If the only possible side effect of the call is writing to the alloca,
3728 // and the result isn't used, we can safely remove any reads implied by the
3729 // call including those which might read the alloca itself.
3730 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3731 return Dest && Dest->Ptr == UsedV;
3732}
3733
3734static std::optional<ModRefInfo>
3735isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
3736 const TargetLibraryInfo &TLI, bool KnowInit) {
3737 SmallVector<Instruction*, 4> Worklist;
3738 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3739 Worklist.push_back(Elt: AI);
3740 ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3741
3742 do {
3743 Instruction *PI = Worklist.pop_back_val();
3744 for (User *U : PI->users()) {
3745 Instruction *I = cast<Instruction>(Val: U);
3746 switch (I->getOpcode()) {
3747 default:
3748 // Give up the moment we see something we can't handle.
3749 return std::nullopt;
3750
3751 case Instruction::AddrSpaceCast:
3752 case Instruction::BitCast:
3753 case Instruction::GetElementPtr:
3754 Users.emplace_back(Args&: I);
3755 Worklist.push_back(Elt: I);
3756 continue;
3757
3758 case Instruction::ICmp: {
3759 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3760 // We can fold eq/ne comparisons with null to false/true, respectively.
3761 // We also fold comparisons in some conditions provided the alloc has
3762 // not escaped (see isNeverEqualToUnescapedAlloc).
3763 if (!ICI->isEquality())
3764 return std::nullopt;
3765 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3766 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3767 return std::nullopt;
3768
3769 // Do not fold compares to aligned_alloc calls, as they may have to
3770 // return null in case the required alignment cannot be satisfied,
3771 // unless we can prove that both alignment and size are valid.
3772 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3773 // Check if alignment and size of a call to aligned_alloc is valid,
3774 // that is alignment is a power-of-2 and the size is a multiple of the
3775 // alignment.
3776 const APInt *Alignment;
3777 const APInt *Size;
3778 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3779 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3780 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3781 };
3782 auto *CB = dyn_cast<CallBase>(Val: AI);
3783 LibFunc TheLibFunc;
3784 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3785 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3786 !AlignmentAndSizeKnownValid(CB))
3787 return std::nullopt;
3788 Users.emplace_back(Args&: I);
3789 continue;
3790 }
3791
3792 case Instruction::Call:
3793 // Ignore no-op and store intrinsics.
3794 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3795 switch (II->getIntrinsicID()) {
3796 default:
3797 return std::nullopt;
3798
3799 case Intrinsic::memmove:
3800 case Intrinsic::memcpy:
3801 case Intrinsic::memset: {
3802 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3803 if (MI->isVolatile())
3804 return std::nullopt;
3805 // Note: this could also be ModRef, but we can still interpret that
3806 // as just Mod in that case.
3807 ModRefInfo NewAccess =
3808 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3809 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3810 return std::nullopt;
3811 Access |= NewAccess;
3812 [[fallthrough]];
3813 }
3814 case Intrinsic::assume:
3815 case Intrinsic::invariant_start:
3816 case Intrinsic::invariant_end:
3817 case Intrinsic::lifetime_start:
3818 case Intrinsic::lifetime_end:
3819 case Intrinsic::objectsize:
3820 Users.emplace_back(Args&: I);
3821 continue;
3822 case Intrinsic::launder_invariant_group:
3823 case Intrinsic::strip_invariant_group:
3824 Users.emplace_back(Args&: I);
3825 Worklist.push_back(Elt: I);
3826 continue;
3827 }
3828 }
3829
3830 if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3831 getAllocationFamily(I, TLI: &TLI) == Family) {
3832 Users.emplace_back(Args&: I);
3833 continue;
3834 }
3835
3836 if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3837 getAllocationFamily(I, TLI: &TLI) == Family) {
3838 Users.emplace_back(Args&: I);
3839 Worklist.push_back(Elt: I);
3840 continue;
3841 }
3842
3843 if (!isRefSet(MRI: Access) &&
3844 isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3845 Access |= ModRefInfo::Mod;
3846 Users.emplace_back(Args&: I);
3847 continue;
3848 }
3849
3850 return std::nullopt;
3851
3852 case Instruction::Store: {
3853 StoreInst *SI = cast<StoreInst>(Val: I);
3854 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3855 return std::nullopt;
3856 if (isRefSet(MRI: Access))
3857 return std::nullopt;
3858 Access |= ModRefInfo::Mod;
3859 Users.emplace_back(Args&: I);
3860 continue;
3861 }
3862
3863 case Instruction::Load: {
3864 LoadInst *LI = cast<LoadInst>(Val: I);
3865 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3866 return std::nullopt;
3867 if (isModSet(MRI: Access))
3868 return std::nullopt;
3869 Access |= ModRefInfo::Ref;
3870 Users.emplace_back(Args&: I);
3871 continue;
3872 }
3873 }
3874 llvm_unreachable("missing a return?");
3875 }
3876 } while (!Worklist.empty());
3877
3878 assert(Access != ModRefInfo::ModRef);
3879 return Access;
3880}
3881
3882Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3883 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3884
3885 // If we have a malloc call which is only used in any amount of comparisons to
3886 // null and free calls, delete the calls and replace the comparisons with true
3887 // or false as appropriate.
3888
3889 // This is based on the principle that we can substitute our own allocation
3890 // function (which will never return null) rather than knowledge of the
3891 // specific function being called. In some sense this can change the permitted
3892 // outputs of a program (when we convert a malloc to an alloca, the fact that
3893 // the allocation is now on the stack is potentially visible, for example),
3894 // but we believe in a permissible manner.
3895 SmallVector<WeakTrackingVH, 64> Users;
3896
3897 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3898 // before each store.
3899 SmallVector<DbgVariableRecord *, 8> DVRs;
3900 std::unique_ptr<DIBuilder> DIB;
3901 if (isa<AllocaInst>(Val: MI)) {
3902 findDbgUsers(V: &MI, DbgVariableRecords&: DVRs);
3903 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3904 }
3905
3906 // Determine what getInitialValueOfAllocation would return without actually
3907 // allocating the result.
3908 bool KnowInitUndef = false;
3909 bool KnowInitZero = false;
3910 Constant *Init =
3911 getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3912 if (Init) {
3913 if (isa<UndefValue>(Val: Init))
3914 KnowInitUndef = true;
3915 else if (Init->isNullValue())
3916 KnowInitZero = true;
3917 }
3918 // The various sanitizers don't actually return undef memory, but rather
3919 // memory initialized with special forms of runtime poison
3920 auto &F = *MI.getFunction();
3921 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3922 F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3923 KnowInitUndef = false;
3924
3925 auto Removable =
3926 isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef);
3927 if (Removable) {
3928 for (WeakTrackingVH &User : Users) {
3929 // Lowering all @llvm.objectsize and MTI calls first because they may use
3930 // a bitcast/GEP of the alloca we are removing.
3931 if (!User)
3932 continue;
3933
3934 Instruction *I = cast<Instruction>(Val: &*User);
3935
3936 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3937 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3938 SmallVector<Instruction *> InsertedInstructions;
3939 Value *Result = lowerObjectSizeCall(
3940 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3941 for (Instruction *Inserted : InsertedInstructions)
3942 Worklist.add(I: Inserted);
3943 replaceInstUsesWith(I&: *I, V: Result);
3944 eraseInstFromFunction(I&: *I);
3945 User = nullptr; // Skip examining in the next loop.
3946 continue;
3947 }
3948 if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3949 if (KnowInitZero && isRefSet(MRI: *Removable)) {
3950 IRBuilderBase::InsertPointGuard Guard(Builder);
3951 Builder.SetInsertPoint(MTI);
3952 auto *M = Builder.CreateMemSet(
3953 Ptr: MTI->getRawDest(),
3954 Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0),
3955 Size: MTI->getLength(), Align: MTI->getDestAlign());
3956 M->copyMetadata(SrcInst: *MTI);
3957 }
3958 }
3959 }
3960 }
3961 for (WeakTrackingVH &User : Users) {
3962 if (!User)
3963 continue;
3964
3965 Instruction *I = cast<Instruction>(Val: &*User);
3966
3967 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3968 replaceInstUsesWith(I&: *C,
3969 V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3970 V: C->isFalseWhenEqual()));
3971 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3972 for (auto *DVR : DVRs)
3973 if (DVR->isAddressOfVariable())
3974 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3975 } else {
3976 // Casts, GEP, or anything else: we're about to delete this instruction,
3977 // so it can not have any valid uses.
3978 Constant *Replace;
3979 if (isa<LoadInst>(Val: I)) {
3980 assert(KnowInitZero || KnowInitUndef);
3981 Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
3982 : Constant::getNullValue(Ty: I->getType());
3983 } else
3984 Replace = PoisonValue::get(T: I->getType());
3985 replaceInstUsesWith(I&: *I, V: Replace);
3986 }
3987 eraseInstFromFunction(I&: *I);
3988 }
3989
3990 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3991 // Replace invoke with a NOP intrinsic to maintain the original CFG
3992 Module *M = II->getModule();
3993 Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
3994 auto *NewII = InvokeInst::Create(
3995 Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
3996 NewII->setDebugLoc(II->getDebugLoc());
3997 }
3998
3999 // Remove debug intrinsics which describe the value contained within the
4000 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4001 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4002 //
4003 // ```
4004 // define void @foo(i32 %0) {
4005 // %a = alloca i32 ; Deleted.
4006 // store i32 %0, i32* %a
4007 // dbg.value(i32 %0, "arg0") ; Not deleted.
4008 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4009 // call void @trivially_inlinable_no_op(i32* %a)
4010 // ret void
4011 // }
4012 // ```
4013 //
4014 // This may not be required if we stop describing the contents of allocas
4015 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4016 // the LowerDbgDeclare utility.
4017 //
4018 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4019 // "arg0" dbg.value may be stale after the call. However, failing to remove
4020 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4021 //
4022 // FIXME: the Assignment Tracking project has now likely made this
4023 // redundant (and it's sometimes harmful).
4024 for (auto *DVR : DVRs)
4025 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4026 DVR->eraseFromParent();
4027
4028 return eraseInstFromFunction(I&: MI);
4029 }
4030 return nullptr;
4031}
4032
4033/// Move the call to free before a NULL test.
4034///
4035/// Check if this free is accessed after its argument has been test
4036/// against NULL (property 0).
4037/// If yes, it is legal to move this call in its predecessor block.
4038///
4039/// The move is performed only if the block containing the call to free
4040/// will be removed, i.e.:
4041/// 1. it has only one predecessor P, and P has two successors
4042/// 2. it contains the call, noops, and an unconditional branch
4043/// 3. its successor is the same as its predecessor's successor
4044///
4045/// The profitability is out-of concern here and this function should
4046/// be called only if the caller knows this transformation would be
4047/// profitable (e.g., for code size).
4048static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
4049 const DataLayout &DL) {
4050 Value *Op = FI.getArgOperand(i: 0);
4051 BasicBlock *FreeInstrBB = FI.getParent();
4052 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4053
4054 // Validate part of constraint #1: Only one predecessor
4055 // FIXME: We can extend the number of predecessor, but in that case, we
4056 // would duplicate the call to free in each predecessor and it may
4057 // not be profitable even for code size.
4058 if (!PredBB)
4059 return nullptr;
4060
4061 // Validate constraint #2: Does this block contains only the call to
4062 // free, noops, and an unconditional branch?
4063 BasicBlock *SuccBB;
4064 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4065 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
4066 return nullptr;
4067
4068 // If there are only 2 instructions in the block, at this point,
4069 // this is the call to free and unconditional.
4070 // If there are more than 2 instructions, check that they are noops
4071 // i.e., they won't hurt the performance of the generated code.
4072 if (FreeInstrBB->size() != 2) {
4073 for (const Instruction &Inst : *FreeInstrBB) {
4074 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4075 isa<PseudoProbeInst>(Val: Inst))
4076 continue;
4077 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
4078 if (!Cast || !Cast->isNoopCast(DL))
4079 return nullptr;
4080 }
4081 }
4082 // Validate the rest of constraint #1 by matching on the pred branch.
4083 Instruction *TI = PredBB->getTerminator();
4084 BasicBlock *TrueBB, *FalseBB;
4085 CmpPredicate Pred;
4086 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
4087 L: m_CombineOr(L: m_Specific(V: Op),
4088 R: m_Specific(V: Op->stripPointerCasts())),
4089 R: m_Zero()),
4090 T&: TrueBB, F&: FalseBB)))
4091 return nullptr;
4092 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4093 return nullptr;
4094
4095 // Validate constraint #3: Ensure the null case just falls through.
4096 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4097 return nullptr;
4098 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4099 "Broken CFG: missing edge from predecessor to successor");
4100
4101 // At this point, we know that everything in FreeInstrBB can be moved
4102 // before TI.
4103 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
4104 if (&Instr == FreeInstrBBTerminator)
4105 break;
4106 Instr.moveBeforePreserving(MovePos: TI->getIterator());
4107 }
4108 assert(FreeInstrBB->size() == 1 &&
4109 "Only the branch instruction should remain");
4110
4111 // Now that we've moved the call to free before the NULL check, we have to
4112 // remove any attributes on its parameter that imply it's non-null, because
4113 // those attributes might have only been valid because of the NULL check, and
4114 // we can get miscompiles if we keep them. This is conservative if non-null is
4115 // also implied by something other than the NULL check, but it's guaranteed to
4116 // be correct, and the conservativeness won't matter in practice, since the
4117 // attributes are irrelevant for the call to free itself and the pointer
4118 // shouldn't be used after the call.
4119 AttributeList Attrs = FI.getAttributes();
4120 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull);
4121 Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable);
4122 if (Dereferenceable.isValid()) {
4123 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4124 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0,
4125 Kind: Attribute::Dereferenceable);
4126 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
4127 }
4128 FI.setAttributes(Attrs);
4129
4130 return &FI;
4131}
4132
4133Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
4134 // free undef -> unreachable.
4135 if (isa<UndefValue>(Val: Op)) {
4136 // Leave a marker since we can't modify the CFG here.
4137 CreateNonTerminatorUnreachable(InsertAt: &FI);
4138 return eraseInstFromFunction(I&: FI);
4139 }
4140
4141 // If we have 'free null' delete the instruction. This can happen in stl code
4142 // when lots of inlining happens.
4143 if (isa<ConstantPointerNull>(Val: Op))
4144 return eraseInstFromFunction(I&: FI);
4145
4146 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4147 // realloc() entirely.
4148 CallInst *CI = dyn_cast<CallInst>(Val: Op);
4149 if (CI && CI->hasOneUse())
4150 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
4151 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
4152
4153 // If we optimize for code size, try to move the call to free before the null
4154 // test so that simplify cfg can remove the empty block and dead code
4155 // elimination the branch. I.e., helps to turn something like:
4156 // if (foo) free(foo);
4157 // into
4158 // free(foo);
4159 //
4160 // Note that we can only do this for 'free' and not for any flavor of
4161 // 'operator delete'; there is no 'operator delete' symbol for which we are
4162 // permitted to invent a call, even if we're passing in a null pointer.
4163 if (MinimizeSize) {
4164 LibFunc Func;
4165 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
4166 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
4167 return I;
4168 }
4169
4170 return nullptr;
4171}
4172
4173Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
4174 Value *RetVal = RI.getReturnValue();
4175 if (!RetVal)
4176 return nullptr;
4177
4178 Function *F = RI.getFunction();
4179 Type *RetTy = RetVal->getType();
4180 if (RetTy->isPointerTy()) {
4181 bool HasDereferenceable =
4182 F->getAttributes().getRetDereferenceableBytes() > 0;
4183 if (F->hasRetAttribute(Kind: Attribute::NonNull) ||
4184 (HasDereferenceable &&
4185 !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
4186 if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
4187 return replaceOperand(I&: RI, OpNum: 0, V);
4188 }
4189 }
4190
4191 if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
4192 return nullptr;
4193
4194 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4195 if (ReturnClass == fcNone)
4196 return nullptr;
4197
4198 KnownFPClass KnownClass;
4199 if (SimplifyDemandedFPClass(I: &RI, Op: 0, DemandedMask: ~ReturnClass, Known&: KnownClass,
4200 Q: SQ.getWithInstruction(I: &RI)))
4201 return &RI;
4202
4203 return nullptr;
4204}
4205
4206// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4207bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
4208 // Try to remove the previous instruction if it must lead to unreachable.
4209 // This includes instructions like stores and "llvm.assume" that may not get
4210 // removed by simple dead code elimination.
4211 bool Changed = false;
4212 while (Instruction *Prev = I.getPrevNode()) {
4213 // While we theoretically can erase EH, that would result in a block that
4214 // used to start with an EH no longer starting with EH, which is invalid.
4215 // To make it valid, we'd need to fixup predecessors to no longer refer to
4216 // this block, but that changes CFG, which is not allowed in InstCombine.
4217 if (Prev->isEHPad())
4218 break; // Can not drop any more instructions. We're done here.
4219
4220 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
4221 break; // Can not drop any more instructions. We're done here.
4222 // Otherwise, this instruction can be freely erased,
4223 // even if it is not side-effect free.
4224
4225 // A value may still have uses before we process it here (for example, in
4226 // another unreachable block), so convert those to poison.
4227 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
4228 eraseInstFromFunction(I&: *Prev);
4229 Changed = true;
4230 }
4231 return Changed;
4232}
4233
4234Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
4235 removeInstructionsBeforeUnreachable(I);
4236 return nullptr;
4237}
4238
4239Instruction *InstCombinerImpl::visitUncondBrInst(UncondBrInst &BI) {
4240 // If this store is the second-to-last instruction in the basic block
4241 // (excluding debug info) and if the block ends with
4242 // an unconditional branch, try to move the store to the successor block.
4243
4244 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4245 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4246 do {
4247 if (BBI != FirstInstr)
4248 --BBI;
4249 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4250
4251 return dyn_cast<StoreInst>(Val&: BBI);
4252 };
4253
4254 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4255 if (mergeStoreIntoSuccessor(SI&: *SI))
4256 return &BI;
4257
4258 return nullptr;
4259}
4260
4261void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
4262 SmallVectorImpl<BasicBlock *> &Worklist) {
4263 if (!DeadEdges.insert(V: {From, To}).second)
4264 return;
4265
4266 // Replace phi node operands in successor with poison.
4267 for (PHINode &PN : To->phis())
4268 for (Use &U : PN.incoming_values())
4269 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
4270 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
4271 addToWorklist(I: &PN);
4272 MadeIRChange = true;
4273 }
4274
4275 Worklist.push_back(Elt: To);
4276}
4277
4278// Under the assumption that I is unreachable, remove it and following
4279// instructions. Changes are reported directly to MadeIRChange.
4280void InstCombinerImpl::handleUnreachableFrom(
4281 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
4282 BasicBlock *BB = I->getParent();
4283 for (Instruction &Inst : make_early_inc_range(
4284 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
4285 y: std::next(x: I->getReverseIterator())))) {
4286 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4287 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
4288 MadeIRChange = true;
4289 }
4290 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4291 continue;
4292 // RemoveDIs: erase debug-info on this instruction manually.
4293 Inst.dropDbgRecords();
4294 eraseInstFromFunction(I&: Inst);
4295 MadeIRChange = true;
4296 }
4297
4298 SmallVector<Value *> Changed;
4299 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
4300 MadeIRChange = true;
4301 for (Value *V : Changed)
4302 addToWorklist(I: cast<Instruction>(Val: V));
4303 }
4304
4305 // Handle potentially dead successors.
4306 for (BasicBlock *Succ : successors(BB))
4307 addDeadEdge(From: BB, To: Succ, Worklist);
4308}
4309
4310void InstCombinerImpl::handlePotentiallyDeadBlocks(
4311 SmallVectorImpl<BasicBlock *> &Worklist) {
4312 while (!Worklist.empty()) {
4313 BasicBlock *BB = Worklist.pop_back_val();
4314 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
4315 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
4316 }))
4317 continue;
4318
4319 handleUnreachableFrom(I: &BB->front(), Worklist);
4320 }
4321}
4322
4323void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4324 BasicBlock *LiveSucc) {
4325 SmallVector<BasicBlock *> Worklist;
4326 for (BasicBlock *Succ : successors(BB)) {
4327 // The live successor isn't dead.
4328 if (Succ == LiveSucc)
4329 continue;
4330
4331 addDeadEdge(From: BB, To: Succ, Worklist);
4332 }
4333
4334 handlePotentiallyDeadBlocks(Worklist);
4335}
4336
4337Instruction *InstCombinerImpl::visitCondBrInst(CondBrInst &BI) {
4338 // Change br (not X), label True, label False to: br X, label False, True
4339 Value *Cond = BI.getCondition();
4340 Value *X;
4341 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4342 // Swap Destinations and condition...
4343 BI.swapSuccessors();
4344 if (BPI)
4345 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4346 return replaceOperand(I&: BI, OpNum: 0, V: X);
4347 }
4348
4349 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4350 // This is done by inverting the condition and swapping successors:
4351 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4352 Value *Y;
4353 if (isa<SelectInst>(Val: Cond) &&
4354 match(V: Cond,
4355 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4356 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4357 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4358
4359 // Set weights for the new OR select instruction too.
4360 if (!ProfcheckDisableMetadataFixes) {
4361 if (auto *OrInst = dyn_cast<Instruction>(Val: Or)) {
4362 if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
4363 SmallVector<uint32_t> Weights;
4364 if (extractBranchWeights(I: *CondInst, Weights)) {
4365 assert(Weights.size() == 2 &&
4366 "Unexpected number of branch weights!");
4367 std::swap(a&: Weights[0], b&: Weights[1]);
4368 setBranchWeights(I&: *OrInst, Weights, /*IsExpected=*/false);
4369 }
4370 }
4371 }
4372 }
4373 BI.swapSuccessors();
4374 if (BPI)
4375 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4376 return replaceOperand(I&: BI, OpNum: 0, V: Or);
4377 }
4378
4379 // If the condition is irrelevant, remove the use so that other
4380 // transforms on the condition become more effective.
4381 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
4382 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
4383
4384 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4385 CmpPredicate Pred;
4386 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4387 !isCanonicalPredicate(Pred)) {
4388 // Swap destinations and condition.
4389 auto *Cmp = cast<CmpInst>(Val: Cond);
4390 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4391 BI.swapSuccessors();
4392 if (BPI)
4393 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4394 Worklist.push(I: Cmp);
4395 return &BI;
4396 }
4397
4398 if (isa<UndefValue>(Val: Cond)) {
4399 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
4400 return nullptr;
4401 }
4402 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4403 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4404 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4405 return nullptr;
4406 }
4407
4408 // Replace all dominated uses of the condition with true/false
4409 // Ignore constant expressions to avoid iterating over uses on other
4410 // functions.
4411 if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) {
4412 for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4413 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0));
4414 if (DT.dominates(BBE: Edge0, U)) {
4415 replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4416 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4417 continue;
4418 }
4419 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1));
4420 if (DT.dominates(BBE: Edge1, U)) {
4421 replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4422 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4423 }
4424 }
4425 }
4426
4427 DC.registerBranch(BI: &BI);
4428 return nullptr;
4429}
4430
4431// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4432// we can prove that both (switch C) and (switch X) go to the default when cond
4433// is false/true.
4434static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4435 SelectInst *Select,
4436 bool IsTrueArm) {
4437 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4438 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4439 if (!C)
4440 return nullptr;
4441
4442 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4443 if (CstBB != SI.getDefaultDest())
4444 return nullptr;
4445 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
4446 CmpPredicate Pred;
4447 const APInt *RHSC;
4448 if (!match(V: Select->getCondition(),
4449 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4450 return nullptr;
4451 if (IsTrueArm)
4452 Pred = ICmpInst::getInversePredicate(pred: Pred);
4453
4454 // See whether we can replace the select with X
4455 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4456 for (auto Case : SI.cases())
4457 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4458 return nullptr;
4459
4460 return X;
4461}
4462
4463Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4464 Value *Cond = SI.getCondition();
4465 Value *Op0;
4466 const APInt *CondOpC;
4467 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4468
4469 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4470 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))))
4471 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4472 return [](const APInt &Case, const APInt &C) { return Case - C; };
4473
4474 if (match(V: Cond, P: m_Sub(L: m_APInt(Res&: CondOpC), R: m_Value(V&: Op0))))
4475 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4476 return [](const APInt &Case, const APInt &C) { return C - Case; };
4477
4478 if (match(V: Cond, P: m_Xor(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))) &&
4479 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4480 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4481 // Prevent creation of large case values by excluding extremes.
4482 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4483
4484 return nullptr;
4485 };
4486
4487 // Attempt to invert and simplify the switch condition, as long as the
4488 // condition is not used further, as it may not be profitable otherwise.
4489 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4490 for (auto &Case : SI.cases()) {
4491 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4492 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: New));
4493 }
4494 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4495 }
4496
4497 uint64_t ShiftAmt;
4498 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4499 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4500 all_of(Range: SI.cases(), P: [&](const auto &Case) {
4501 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4502 })) {
4503 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4504 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4505 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4506 Shl->hasOneUse()) {
4507 Value *NewCond = Op0;
4508 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4509 // If the shift may wrap, we need to mask off the shifted bits.
4510 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4511 NewCond = Builder.CreateAnd(
4512 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4513 }
4514 for (auto Case : SI.cases()) {
4515 const APInt &CaseVal = Case.getCaseValue()->getValue();
4516 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4517 : CaseVal.lshr(shiftAmt: ShiftAmt);
4518 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4519 }
4520 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4521 }
4522 }
4523
4524 // Fold switch(zext/sext(X)) into switch(X) if possible.
4525 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4526 bool IsZExt = isa<ZExtInst>(Val: Cond);
4527 Type *SrcTy = Op0->getType();
4528 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4529
4530 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4531 const APInt &CaseVal = Case.getCaseValue()->getValue();
4532 return IsZExt ? CaseVal.isIntN(N: NewWidth)
4533 : CaseVal.isSignedIntN(N: NewWidth);
4534 })) {
4535 for (auto &Case : SI.cases()) {
4536 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4537 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4538 }
4539 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4540 }
4541 }
4542
4543 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4544 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4545 if (Value *V =
4546 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4547 return replaceOperand(I&: SI, OpNum: 0, V);
4548 if (Value *V =
4549 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4550 return replaceOperand(I&: SI, OpNum: 0, V);
4551 }
4552
4553 KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4554 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4555 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4556
4557 // Compute the number of leading bits we can ignore.
4558 // TODO: A better way to determine this would use ComputeNumSignBits().
4559 for (const auto &C : SI.cases()) {
4560 LeadingKnownZeros =
4561 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4562 LeadingKnownOnes =
4563 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4564 }
4565
4566 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4567
4568 // Shrink the condition operand if the new type is smaller than the old type.
4569 // But do not shrink to a non-standard type, because backend can't generate
4570 // good code for that yet.
4571 // TODO: We can make it aggressive again after fixing PR39569.
4572 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4573 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4574 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4575 Builder.SetInsertPoint(&SI);
4576 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4577
4578 for (auto Case : SI.cases()) {
4579 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4580 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4581 }
4582 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4583 }
4584
4585 if (isa<UndefValue>(Val: Cond)) {
4586 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
4587 return nullptr;
4588 }
4589 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4590 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4591 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4592 return nullptr;
4593 }
4594
4595 return nullptr;
4596}
4597
4598Instruction *
4599InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4600 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4601 if (!WO)
4602 return nullptr;
4603
4604 Intrinsic::ID OvID = WO->getIntrinsicID();
4605 const APInt *C = nullptr;
4606 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4607 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4608 OvID == Intrinsic::umul_with_overflow)) {
4609 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4610 if (C->isAllOnes())
4611 return BinaryOperator::CreateNeg(Op: WO->getLHS());
4612 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4613 if (C->isPowerOf2()) {
4614 return BinaryOperator::CreateShl(
4615 V1: WO->getLHS(),
4616 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4617 }
4618 }
4619 }
4620
4621 // We're extracting from an overflow intrinsic. See if we're the only user.
4622 // That allows us to simplify multiple result intrinsics to simpler things
4623 // that just get one value.
4624 if (!WO->hasOneUse())
4625 return nullptr;
4626
4627 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4628 // and replace it with a traditional binary instruction.
4629 if (*EV.idx_begin() == 0) {
4630 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4631 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4632 // Replace the old instruction's uses with poison.
4633 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4634 eraseInstFromFunction(I&: *WO);
4635 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4636 }
4637
4638 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4639
4640 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4641 if (OvID == Intrinsic::usub_with_overflow)
4642 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4643
4644 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4645 // +1 is not possible because we assume signed values.
4646 if (OvID == Intrinsic::smul_with_overflow &&
4647 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
4648 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4649
4650 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4651 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4652 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4653 // Only handle even bitwidths for performance reasons.
4654 if (BitWidth % 2 == 0)
4655 return new ICmpInst(
4656 ICmpInst::ICMP_UGT, WO->getLHS(),
4657 ConstantInt::get(Ty: WO->getLHS()->getType(),
4658 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
4659 }
4660
4661 // If only the overflow result is used, and the right hand side is a
4662 // constant (or constant splat), we can remove the intrinsic by directly
4663 // checking for overflow.
4664 if (C) {
4665 // Compute the no-wrap range for LHS given RHS=C, then construct an
4666 // equivalent icmp, potentially using an offset.
4667 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4668 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4669
4670 CmpInst::Predicate Pred;
4671 APInt NewRHSC, Offset;
4672 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4673 auto *OpTy = WO->getRHS()->getType();
4674 auto *NewLHS = WO->getLHS();
4675 if (Offset != 0)
4676 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4677 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4678 ConstantInt::get(Ty: OpTy, V: NewRHSC));
4679 }
4680
4681 return nullptr;
4682}
4683
4684static Value *foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall,
4685 SelectInst *SelectInst,
4686 InstCombiner::BuilderTy &Builder) {
4687 // Helper to fold frexp of select to select of frexp.
4688
4689 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4690 return nullptr;
4691 Value *Cond = SelectInst->getCondition();
4692 Value *TrueVal = SelectInst->getTrueValue();
4693 Value *FalseVal = SelectInst->getFalseValue();
4694
4695 const APFloat *ConstVal = nullptr;
4696 Value *VarOp = nullptr;
4697 bool ConstIsTrue = false;
4698
4699 if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4700 VarOp = FalseVal;
4701 ConstIsTrue = true;
4702 } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4703 VarOp = TrueVal;
4704 ConstIsTrue = false;
4705 } else {
4706 return nullptr;
4707 }
4708
4709 Builder.SetInsertPoint(&EV);
4710
4711 CallInst *NewFrexp =
4712 Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4713 NewFrexp->copyIRFlags(V: FrexpCall);
4714
4715 Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa");
4716
4717 int Exp;
4718 APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4719
4720 Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4721
4722 Value *NewSel = Builder.CreateSelectFMF(
4723 C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4724 False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4725 return NewSel;
4726}
4727Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4728 Value *Agg = EV.getAggregateOperand();
4729
4730 if (!EV.hasIndices())
4731 return replaceInstUsesWith(I&: EV, V: Agg);
4732
4733 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4734 Q: SQ.getWithInstruction(I: &EV)))
4735 return replaceInstUsesWith(I&: EV, V);
4736
4737 Value *Cond, *TrueVal, *FalseVal;
4738 if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4739 C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4740 auto *SelInst =
4741 cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0));
4742 if (Value *Result =
4743 foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4744 return replaceInstUsesWith(I&: EV, V: Result);
4745 }
4746 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4747 // We're extracting from an insertvalue instruction, compare the indices
4748 const unsigned *exti, *exte, *insi, *inse;
4749 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4750 exte = EV.idx_end(), inse = IV->idx_end();
4751 exti != exte && insi != inse;
4752 ++exti, ++insi) {
4753 if (*insi != *exti)
4754 // The insert and extract both reference distinctly different elements.
4755 // This means the extract is not influenced by the insert, and we can
4756 // replace the aggregate operand of the extract with the aggregate
4757 // operand of the insert. i.e., replace
4758 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4759 // %E = extractvalue { i32, { i32 } } %I, 0
4760 // with
4761 // %E = extractvalue { i32, { i32 } } %A, 0
4762 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4763 Idxs: EV.getIndices());
4764 }
4765 if (exti == exte && insi == inse)
4766 // Both iterators are at the end: Index lists are identical. Replace
4767 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4768 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4769 // with "i32 42"
4770 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4771 if (exti == exte) {
4772 // The extract list is a prefix of the insert list. i.e. replace
4773 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4774 // %E = extractvalue { i32, { i32 } } %I, 1
4775 // with
4776 // %X = extractvalue { i32, { i32 } } %A, 1
4777 // %E = insertvalue { i32 } %X, i32 42, 0
4778 // by switching the order of the insert and extract (though the
4779 // insertvalue should be left in, since it may have other uses).
4780 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4781 Idxs: EV.getIndices());
4782 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4783 Idxs: ArrayRef(insi, inse));
4784 }
4785 if (insi == inse)
4786 // The insert list is a prefix of the extract list
4787 // We can simply remove the common indices from the extract and make it
4788 // operate on the inserted value instead of the insertvalue result.
4789 // i.e., replace
4790 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4791 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4792 // with
4793 // %E extractvalue { i32 } { i32 42 }, 0
4794 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4795 Idxs: ArrayRef(exti, exte));
4796 }
4797
4798 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4799 return R;
4800
4801 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4802 // Bail out if the aggregate contains scalable vector type
4803 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4804 STy && STy->isScalableTy())
4805 return nullptr;
4806
4807 // If the (non-volatile) load only has one use, we can rewrite this to a
4808 // load from a GEP. This reduces the size of the load. If a load is used
4809 // only by extractvalue instructions then this either must have been
4810 // optimized before, or it is a struct with padding, in which case we
4811 // don't want to do the transformation as it loses padding knowledge.
4812 if (L->isSimple() && L->hasOneUse()) {
4813 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4814 SmallVector<Value*, 4> Indices;
4815 // Prefix an i32 0 since we need the first element.
4816 Indices.push_back(Elt: Builder.getInt32(C: 0));
4817 for (unsigned Idx : EV.indices())
4818 Indices.push_back(Elt: Builder.getInt32(C: Idx));
4819
4820 // We need to insert these at the location of the old load, not at that of
4821 // the extractvalue.
4822 Builder.SetInsertPoint(L);
4823 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4824 Ptr: L->getPointerOperand(), IdxList: Indices);
4825 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4826 // Whatever aliasing information we had for the orignal load must also
4827 // hold for the smaller load, so propagate the annotations.
4828 NL->setAAMetadata(L->getAAMetadata());
4829 // Returning the load directly will cause the main loop to insert it in
4830 // the wrong spot, so use replaceInstUsesWith().
4831 return replaceInstUsesWith(I&: EV, V: NL);
4832 }
4833 }
4834
4835 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4836 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4837 return Res;
4838
4839 // Canonicalize extract (select Cond, TV, FV)
4840 // -> select cond, (extract TV), (extract FV)
4841 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4842 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
4843 return R;
4844
4845 // We could simplify extracts from other values. Note that nested extracts may
4846 // already be simplified implicitly by the above: extract (extract (insert) )
4847 // will be translated into extract ( insert ( extract ) ) first and then just
4848 // the value inserted, if appropriate. Similarly for extracts from single-use
4849 // loads: extract (extract (load)) will be translated to extract (load (gep))
4850 // and if again single-use then via load (gep (gep)) to load (gep).
4851 // However, double extracts from e.g. function arguments or return values
4852 // aren't handled yet.
4853 return nullptr;
4854}
4855
4856/// Return 'true' if the given typeinfo will match anything.
4857static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4858 switch (Personality) {
4859 case EHPersonality::GNU_C:
4860 case EHPersonality::GNU_C_SjLj:
4861 case EHPersonality::Rust:
4862 // The GCC C EH and Rust personality only exists to support cleanups, so
4863 // it's not clear what the semantics of catch clauses are.
4864 return false;
4865 case EHPersonality::Unknown:
4866 return false;
4867 case EHPersonality::GNU_Ada:
4868 // While __gnat_all_others_value will match any Ada exception, it doesn't
4869 // match foreign exceptions (or didn't, before gcc-4.7).
4870 return false;
4871 case EHPersonality::GNU_CXX:
4872 case EHPersonality::GNU_CXX_SjLj:
4873 case EHPersonality::GNU_ObjC:
4874 case EHPersonality::MSVC_X86SEH:
4875 case EHPersonality::MSVC_TableSEH:
4876 case EHPersonality::MSVC_CXX:
4877 case EHPersonality::CoreCLR:
4878 case EHPersonality::Wasm_CXX:
4879 case EHPersonality::XL_CXX:
4880 case EHPersonality::ZOS_CXX:
4881 return TypeInfo->isNullValue();
4882 }
4883 llvm_unreachable("invalid enum");
4884}
4885
4886static bool shorter_filter(const Value *LHS, const Value *RHS) {
4887 return
4888 cast<ArrayType>(Val: LHS->getType())->getNumElements()
4889 <
4890 cast<ArrayType>(Val: RHS->getType())->getNumElements();
4891}
4892
4893Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4894 // The logic here should be correct for any real-world personality function.
4895 // However if that turns out not to be true, the offending logic can always
4896 // be conditioned on the personality function, like the catch-all logic is.
4897 EHPersonality Personality =
4898 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4899
4900 // Simplify the list of clauses, eg by removing repeated catch clauses
4901 // (these are often created by inlining).
4902 bool MakeNewInstruction = false; // If true, recreate using the following:
4903 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4904 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4905
4906 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4907 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4908 bool isLastClause = i + 1 == e;
4909 if (LI.isCatch(Idx: i)) {
4910 // A catch clause.
4911 Constant *CatchClause = LI.getClause(Idx: i);
4912 Constant *TypeInfo = CatchClause->stripPointerCasts();
4913
4914 // If we already saw this clause, there is no point in having a second
4915 // copy of it.
4916 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4917 // This catch clause was not already seen.
4918 NewClauses.push_back(Elt: CatchClause);
4919 } else {
4920 // Repeated catch clause - drop the redundant copy.
4921 MakeNewInstruction = true;
4922 }
4923
4924 // If this is a catch-all then there is no point in keeping any following
4925 // clauses or marking the landingpad as having a cleanup.
4926 if (isCatchAll(Personality, TypeInfo)) {
4927 if (!isLastClause)
4928 MakeNewInstruction = true;
4929 CleanupFlag = false;
4930 break;
4931 }
4932 } else {
4933 // A filter clause. If any of the filter elements were already caught
4934 // then they can be dropped from the filter. It is tempting to try to
4935 // exploit the filter further by saying that any typeinfo that does not
4936 // occur in the filter can't be caught later (and thus can be dropped).
4937 // However this would be wrong, since typeinfos can match without being
4938 // equal (for example if one represents a C++ class, and the other some
4939 // class derived from it).
4940 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4941 Constant *FilterClause = LI.getClause(Idx: i);
4942 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4943 unsigned NumTypeInfos = FilterType->getNumElements();
4944
4945 // An empty filter catches everything, so there is no point in keeping any
4946 // following clauses or marking the landingpad as having a cleanup. By
4947 // dealing with this case here the following code is made a bit simpler.
4948 if (!NumTypeInfos) {
4949 NewClauses.push_back(Elt: FilterClause);
4950 if (!isLastClause)
4951 MakeNewInstruction = true;
4952 CleanupFlag = false;
4953 break;
4954 }
4955
4956 bool MakeNewFilter = false; // If true, make a new filter.
4957 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4958 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4959 // Not an empty filter - it contains at least one null typeinfo.
4960 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4961 Constant *TypeInfo =
4962 Constant::getNullValue(Ty: FilterType->getElementType());
4963 // If this typeinfo is a catch-all then the filter can never match.
4964 if (isCatchAll(Personality, TypeInfo)) {
4965 // Throw the filter away.
4966 MakeNewInstruction = true;
4967 continue;
4968 }
4969
4970 // There is no point in having multiple copies of this typeinfo, so
4971 // discard all but the first copy if there is more than one.
4972 NewFilterElts.push_back(Elt: TypeInfo);
4973 if (NumTypeInfos > 1)
4974 MakeNewFilter = true;
4975 } else {
4976 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4977 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4978 NewFilterElts.reserve(N: NumTypeInfos);
4979
4980 // Remove any filter elements that were already caught or that already
4981 // occurred in the filter. While there, see if any of the elements are
4982 // catch-alls. If so, the filter can be discarded.
4983 bool SawCatchAll = false;
4984 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4985 Constant *Elt = Filter->getOperand(i_nocapture: j);
4986 Constant *TypeInfo = Elt->stripPointerCasts();
4987 if (isCatchAll(Personality, TypeInfo)) {
4988 // This element is a catch-all. Bail out, noting this fact.
4989 SawCatchAll = true;
4990 break;
4991 }
4992
4993 // Even if we've seen a type in a catch clause, we don't want to
4994 // remove it from the filter. An unexpected type handler may be
4995 // set up for a call site which throws an exception of the same
4996 // type caught. In order for the exception thrown by the unexpected
4997 // handler to propagate correctly, the filter must be correctly
4998 // described for the call site.
4999 //
5000 // Example:
5001 //
5002 // void unexpected() { throw 1;}
5003 // void foo() throw (int) {
5004 // std::set_unexpected(unexpected);
5005 // try {
5006 // throw 2.0;
5007 // } catch (int i) {}
5008 // }
5009
5010 // There is no point in having multiple copies of the same typeinfo in
5011 // a filter, so only add it if we didn't already.
5012 if (SeenInFilter.insert(Ptr: TypeInfo).second)
5013 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
5014 }
5015 // A filter containing a catch-all cannot match anything by definition.
5016 if (SawCatchAll) {
5017 // Throw the filter away.
5018 MakeNewInstruction = true;
5019 continue;
5020 }
5021
5022 // If we dropped something from the filter, make a new one.
5023 if (NewFilterElts.size() < NumTypeInfos)
5024 MakeNewFilter = true;
5025 }
5026 if (MakeNewFilter) {
5027 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
5028 NumElements: NewFilterElts.size());
5029 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
5030 MakeNewInstruction = true;
5031 }
5032
5033 NewClauses.push_back(Elt: FilterClause);
5034
5035 // If the new filter is empty then it will catch everything so there is
5036 // no point in keeping any following clauses or marking the landingpad
5037 // as having a cleanup. The case of the original filter being empty was
5038 // already handled above.
5039 if (MakeNewFilter && !NewFilterElts.size()) {
5040 assert(MakeNewInstruction && "New filter but not a new instruction!");
5041 CleanupFlag = false;
5042 break;
5043 }
5044 }
5045 }
5046
5047 // If several filters occur in a row then reorder them so that the shortest
5048 // filters come first (those with the smallest number of elements). This is
5049 // advantageous because shorter filters are more likely to match, speeding up
5050 // unwinding, but mostly because it increases the effectiveness of the other
5051 // filter optimizations below.
5052 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5053 unsigned j;
5054 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5055 for (j = i; j != e; ++j)
5056 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
5057 break;
5058
5059 // Check whether the filters are already sorted by length. We need to know
5060 // if sorting them is actually going to do anything so that we only make a
5061 // new landingpad instruction if it does.
5062 for (unsigned k = i; k + 1 < j; ++k)
5063 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
5064 // Not sorted, so sort the filters now. Doing an unstable sort would be
5065 // correct too but reordering filters pointlessly might confuse users.
5066 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
5067 comp: shorter_filter);
5068 MakeNewInstruction = true;
5069 break;
5070 }
5071
5072 // Look for the next batch of filters.
5073 i = j + 1;
5074 }
5075
5076 // If typeinfos matched if and only if equal, then the elements of a filter L
5077 // that occurs later than a filter F could be replaced by the intersection of
5078 // the elements of F and L. In reality two typeinfos can match without being
5079 // equal (for example if one represents a C++ class, and the other some class
5080 // derived from it) so it would be wrong to perform this transform in general.
5081 // However the transform is correct and useful if F is a subset of L. In that
5082 // case L can be replaced by F, and thus removed altogether since repeating a
5083 // filter is pointless. So here we look at all pairs of filters F and L where
5084 // L follows F in the list of clauses, and remove L if every element of F is
5085 // an element of L. This can occur when inlining C++ functions with exception
5086 // specifications.
5087 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5088 // Examine each filter in turn.
5089 Value *Filter = NewClauses[i];
5090 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
5091 if (!FTy)
5092 // Not a filter - skip it.
5093 continue;
5094 unsigned FElts = FTy->getNumElements();
5095 // Examine each filter following this one. Doing this backwards means that
5096 // we don't have to worry about filters disappearing under us when removed.
5097 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5098 Value *LFilter = NewClauses[j];
5099 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
5100 if (!LTy)
5101 // Not a filter - skip it.
5102 continue;
5103 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5104 // an element of LFilter, then discard LFilter.
5105 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5106 // If Filter is empty then it is a subset of LFilter.
5107 if (!FElts) {
5108 // Discard LFilter.
5109 NewClauses.erase(CI: J);
5110 MakeNewInstruction = true;
5111 // Move on to the next filter.
5112 continue;
5113 }
5114 unsigned LElts = LTy->getNumElements();
5115 // If Filter is longer than LFilter then it cannot be a subset of it.
5116 if (FElts > LElts)
5117 // Move on to the next filter.
5118 continue;
5119 // At this point we know that LFilter has at least one element.
5120 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
5121 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5122 // already know that Filter is not longer than LFilter).
5123 if (isa<ConstantAggregateZero>(Val: Filter)) {
5124 assert(FElts <= LElts && "Should have handled this case earlier!");
5125 // Discard LFilter.
5126 NewClauses.erase(CI: J);
5127 MakeNewInstruction = true;
5128 }
5129 // Move on to the next filter.
5130 continue;
5131 }
5132 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
5133 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
5134 // Since Filter is non-empty and contains only zeros, it is a subset of
5135 // LFilter iff LFilter contains a zero.
5136 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5137 for (unsigned l = 0; l != LElts; ++l)
5138 if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
5139 // LFilter contains a zero - discard it.
5140 NewClauses.erase(CI: J);
5141 MakeNewInstruction = true;
5142 break;
5143 }
5144 // Move on to the next filter.
5145 continue;
5146 }
5147 // At this point we know that both filters are ConstantArrays. Loop over
5148 // operands to see whether every element of Filter is also an element of
5149 // LFilter. Since filters tend to be short this is probably faster than
5150 // using a method that scales nicely.
5151 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
5152 bool AllFound = true;
5153 for (unsigned f = 0; f != FElts; ++f) {
5154 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
5155 AllFound = false;
5156 for (unsigned l = 0; l != LElts; ++l) {
5157 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
5158 if (LTypeInfo == FTypeInfo) {
5159 AllFound = true;
5160 break;
5161 }
5162 }
5163 if (!AllFound)
5164 break;
5165 }
5166 if (AllFound) {
5167 // Discard LFilter.
5168 NewClauses.erase(CI: J);
5169 MakeNewInstruction = true;
5170 }
5171 // Move on to the next filter.
5172 }
5173 }
5174
5175 // If we changed any of the clauses, replace the old landingpad instruction
5176 // with a new one.
5177 if (MakeNewInstruction) {
5178 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
5179 NumReservedClauses: NewClauses.size());
5180 for (Constant *C : NewClauses)
5181 NLI->addClause(ClauseVal: C);
5182 // A landing pad with no clauses must have the cleanup flag set. It is
5183 // theoretically possible, though highly unlikely, that we eliminated all
5184 // clauses. If so, force the cleanup flag to true.
5185 if (NewClauses.empty())
5186 CleanupFlag = true;
5187 NLI->setCleanup(CleanupFlag);
5188 return NLI;
5189 }
5190
5191 // Even if none of the clauses changed, we may nonetheless have understood
5192 // that the cleanup flag is pointless. Clear it if so.
5193 if (LI.isCleanup() != CleanupFlag) {
5194 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5195 LI.setCleanup(CleanupFlag);
5196 return &LI;
5197 }
5198
5199 return nullptr;
5200}
5201
5202Value *
5203InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
5204 // Try to push freeze through instructions that propagate but don't produce
5205 // poison as far as possible. If an operand of freeze follows three
5206 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5207 // guaranteed-non-poison operands then push the freeze through to the one
5208 // operand that is not guaranteed non-poison. The actual transform is as
5209 // follows.
5210 // Op1 = ... ; Op1 can be posion
5211 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5212 // ; single guaranteed-non-poison operands
5213 // ... = Freeze(Op0)
5214 // =>
5215 // Op1 = ...
5216 // Op1.fr = Freeze(Op1)
5217 // ... = Inst(Op1.fr, NonPoisonOps...)
5218 auto *OrigOp = OrigFI.getOperand(i_nocapture: 0);
5219 auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
5220
5221 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5222 // potentially reduces their optimization potential, so let's only do this iff
5223 // the OrigOp is only used by the freeze.
5224 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp))
5225 return nullptr;
5226
5227 // We can't push the freeze through an instruction which can itself create
5228 // poison. If the only source of new poison is flags, we can simply
5229 // strip them (since we know the only use is the freeze and nothing can
5230 // benefit from them.)
5231 if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
5232 /*ConsiderFlagsAndMetadata*/ false))
5233 return nullptr;
5234
5235 // If operand is guaranteed not to be poison, there is no need to add freeze
5236 // to the operand. So we first find the operand that is not guaranteed to be
5237 // poison.
5238 Value *MaybePoisonOperand = nullptr;
5239 for (Value *V : OrigOpInst->operands()) {
5240 if (isa<MetadataAsValue>(Val: V) || isGuaranteedNotToBeUndefOrPoison(V) ||
5241 // Treat identical operands as a single operand.
5242 (MaybePoisonOperand && MaybePoisonOperand == V))
5243 continue;
5244 if (!MaybePoisonOperand)
5245 MaybePoisonOperand = V;
5246 else
5247 return nullptr;
5248 }
5249
5250 OrigOpInst->dropPoisonGeneratingAnnotations();
5251
5252 // If all operands are guaranteed to be non-poison, we can drop freeze.
5253 if (!MaybePoisonOperand)
5254 return OrigOp;
5255
5256 Builder.SetInsertPoint(OrigOpInst);
5257 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5258 V: MaybePoisonOperand, Name: MaybePoisonOperand->getName() + ".fr");
5259
5260 OrigOpInst->replaceUsesOfWith(From: MaybePoisonOperand, To: FrozenMaybePoisonOperand);
5261 return OrigOp;
5262}
5263
5264Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
5265 PHINode *PN) {
5266 // Detect whether this is a recurrence with a start value and some number of
5267 // backedge values. We'll check whether we can push the freeze through the
5268 // backedge values (possibly dropping poison flags along the way) until we
5269 // reach the phi again. In that case, we can move the freeze to the start
5270 // value.
5271 Use *StartU = nullptr;
5272 SmallVector<Value *> Worklist;
5273 for (Use &U : PN->incoming_values()) {
5274 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
5275 // Add backedge value to worklist.
5276 Worklist.push_back(Elt: U.get());
5277 continue;
5278 }
5279
5280 // Don't bother handling multiple start values.
5281 if (StartU)
5282 return nullptr;
5283 StartU = &U;
5284 }
5285
5286 if (!StartU || Worklist.empty())
5287 return nullptr; // Not a recurrence.
5288
5289 Value *StartV = StartU->get();
5290 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
5291 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
5292 // We can't insert freeze if the start value is the result of the
5293 // terminator (e.g. an invoke).
5294 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5295 return nullptr;
5296
5297 SmallPtrSet<Value *, 32> Visited;
5298 SmallVector<Instruction *> DropFlags;
5299 while (!Worklist.empty()) {
5300 Value *V = Worklist.pop_back_val();
5301 if (!Visited.insert(Ptr: V).second)
5302 continue;
5303
5304 if (Visited.size() > 32)
5305 return nullptr; // Limit the total number of values we inspect.
5306
5307 // Assume that PN is non-poison, because it will be after the transform.
5308 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5309 continue;
5310
5311 Instruction *I = dyn_cast<Instruction>(Val: V);
5312 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
5313 /*ConsiderFlagsAndMetadata*/ false))
5314 return nullptr;
5315
5316 DropFlags.push_back(Elt: I);
5317 append_range(C&: Worklist, R: I->operands());
5318 }
5319
5320 for (Instruction *I : DropFlags)
5321 I->dropPoisonGeneratingAnnotations();
5322
5323 if (StartNeedsFreeze) {
5324 Builder.SetInsertPoint(StartBB->getTerminator());
5325 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
5326 Name: StartV->getName() + ".fr");
5327 replaceUse(U&: *StartU, NewValue: FrozenStartV);
5328 }
5329 return replaceInstUsesWith(I&: FI, V: PN);
5330}
5331
5332bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
5333 Value *Op = FI.getOperand(i_nocapture: 0);
5334
5335 if (isa<Constant>(Val: Op) || Op->hasOneUse())
5336 return false;
5337
5338 // Move the freeze directly after the definition of its operand, so that
5339 // it dominates the maximum number of uses. Note that it may not dominate
5340 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5341 // the normal/default destination. This is why the domination check in the
5342 // replacement below is still necessary.
5343 BasicBlock::iterator MoveBefore;
5344 if (isa<Argument>(Val: Op)) {
5345 MoveBefore =
5346 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5347 } else {
5348 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5349 if (!MoveBeforeOpt)
5350 return false;
5351 MoveBefore = *MoveBeforeOpt;
5352 }
5353
5354 // Re-point iterator to come after any debug-info records.
5355 MoveBefore.setHeadBit(false);
5356
5357 bool Changed = false;
5358 if (&FI != &*MoveBefore) {
5359 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
5360 Changed = true;
5361 }
5362
5363 Changed |= Op->replaceUsesWithIf(
5364 New: &FI, ShouldReplace: [&](Use &U) -> bool { return DT.dominates(Def: &FI, U); });
5365
5366 return Changed;
5367}
5368
5369// Check if any direct or bitcast user of this value is a shuffle instruction.
5370static bool isUsedWithinShuffleVector(Value *V) {
5371 for (auto *U : V->users()) {
5372 if (isa<ShuffleVectorInst>(Val: U))
5373 return true;
5374 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5375 return true;
5376 }
5377 return false;
5378}
5379
5380Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5381 Value *Op0 = I.getOperand(i_nocapture: 0);
5382
5383 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5384 return replaceInstUsesWith(I, V);
5385
5386 // freeze (phi const, x) --> phi const, (freeze x)
5387 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5388 if (Instruction *NV = foldOpIntoPhi(I, PN))
5389 return NV;
5390 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5391 return NV;
5392 }
5393
5394 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5395 return replaceInstUsesWith(I, V: NI);
5396
5397 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5398 // - or: pick -1
5399 // - select's condition: if the true value is constant, choose it by making
5400 // the condition true.
5401 // - phi: pick the common constant across operands
5402 // - default: pick 0
5403 //
5404 // Note that this transform is intentionally done here rather than
5405 // via an analysis in InstSimplify or at individual user sites. That is
5406 // because we must produce the same value for all uses of the freeze -
5407 // it's the reason "freeze" exists!
5408 //
5409 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5410 // duplicating logic for binops at least.
5411 auto getUndefReplacement = [&](Type *Ty) {
5412 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5413 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5414 // removed.
5415 Constant *BestValue = nullptr;
5416 for (Value *V : PN.incoming_values()) {
5417 if (match(V, P: m_Freeze(Op: m_Undef())))
5418 continue;
5419
5420 Constant *C = dyn_cast<Constant>(Val: V);
5421 if (!C)
5422 return nullptr;
5423
5424 if (!isGuaranteedNotToBeUndefOrPoison(V: C))
5425 return nullptr;
5426
5427 if (BestValue && BestValue != C)
5428 return nullptr;
5429
5430 BestValue = C;
5431 }
5432 return BestValue;
5433 };
5434
5435 Value *NullValue = Constant::getNullValue(Ty);
5436 Value *BestValue = nullptr;
5437 for (auto *U : I.users()) {
5438 Value *V = NullValue;
5439 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5440 V = ConstantInt::getAllOnesValue(Ty);
5441 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5442 V = ConstantInt::getTrue(Ty);
5443 else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5444 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5445 V = NullValue;
5446 } else if (auto *PHI = dyn_cast<PHINode>(Val: U)) {
5447 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5448 V = MaybeV;
5449 }
5450
5451 if (!BestValue)
5452 BestValue = V;
5453 else if (BestValue != V)
5454 BestValue = NullValue;
5455 }
5456 assert(BestValue && "Must have at least one use");
5457 assert(BestValue != &I && "Cannot replace with itself");
5458 return BestValue;
5459 };
5460
5461 if (match(V: Op0, P: m_Undef())) {
5462 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5463 // a shuffle. This may improve codegen for shuffles that allow
5464 // unspecified inputs.
5465 if (isUsedWithinShuffleVector(V: &I))
5466 return nullptr;
5467 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
5468 }
5469
5470 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5471 Type *Ty = C->getType();
5472 auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5473 if (!VTy)
5474 return nullptr;
5475 unsigned NumElts = VTy->getNumElements();
5476 Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5477 for (unsigned i = 0; i != NumElts; ++i) {
5478 Constant *EltC = C->getAggregateElement(Elt: i);
5479 if (EltC && !match(V: EltC, P: m_Undef())) {
5480 BestValue = EltC;
5481 break;
5482 }
5483 }
5484 return Constant::replaceUndefsWith(C, Replacement: BestValue);
5485 };
5486
5487 Constant *C;
5488 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5489 !C->containsConstantExpression()) {
5490 if (Constant *Repl = getFreezeVectorReplacement(C))
5491 return replaceInstUsesWith(I, V: Repl);
5492 }
5493
5494 // Replace uses of Op with freeze(Op).
5495 if (freezeOtherUses(FI&: I))
5496 return &I;
5497
5498 return nullptr;
5499}
5500
5501/// Check for case where the call writes to an otherwise dead alloca. This
5502/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5503/// helper *only* analyzes the write; doesn't check any other legality aspect.
5504static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5505 auto *CB = dyn_cast<CallBase>(Val: I);
5506 if (!CB)
5507 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5508 // to allow reload along used path as described below. Otherwise, this
5509 // is simply a store to a dead allocation which will be removed.
5510 return false;
5511 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5512 if (!Dest)
5513 return false;
5514 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
5515 if (!AI)
5516 // TODO: allow malloc?
5517 return false;
5518 // TODO: allow memory access dominated by move point? Note that since AI
5519 // could have a reference to itself captured by the call, we would need to
5520 // account for cycles in doing so.
5521 SmallVector<const User *> AllocaUsers;
5522 SmallPtrSet<const User *, 4> Visited;
5523 auto pushUsers = [&](const Instruction &I) {
5524 for (const User *U : I.users()) {
5525 if (Visited.insert(Ptr: U).second)
5526 AllocaUsers.push_back(Elt: U);
5527 }
5528 };
5529 pushUsers(*AI);
5530 while (!AllocaUsers.empty()) {
5531 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5532 if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) {
5533 pushUsers(*UserI);
5534 continue;
5535 }
5536 if (UserI == CB)
5537 continue;
5538 // TODO: support lifetime.start/end here
5539 return false;
5540 }
5541 return true;
5542}
5543
5544/// Try to move the specified instruction from its current block into the
5545/// beginning of DestBlock, which can only happen if it's safe to move the
5546/// instruction past all of the instructions between it and the end of its
5547/// block.
5548bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5549 BasicBlock *DestBlock) {
5550 BasicBlock *SrcBlock = I->getParent();
5551
5552 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5553 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5554 I->isTerminator())
5555 return false;
5556
5557 // Do not sink static or dynamic alloca instructions. Static allocas must
5558 // remain in the entry block, and dynamic allocas must not be sunk in between
5559 // a stacksave / stackrestore pair, which would incorrectly shorten its
5560 // lifetime.
5561 if (isa<AllocaInst>(Val: I))
5562 return false;
5563
5564 // Do not sink into catchswitch blocks.
5565 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5566 return false;
5567
5568 // Do not sink convergent call instructions.
5569 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5570 if (CI->isConvergent())
5571 return false;
5572 }
5573
5574 // Unless we can prove that the memory write isn't visibile except on the
5575 // path we're sinking to, we must bail.
5576 if (I->mayWriteToMemory()) {
5577 if (!SoleWriteToDeadLocal(I, TLI))
5578 return false;
5579 }
5580
5581 // We can only sink load instructions if there is nothing between the load and
5582 // the end of block that could change the value.
5583 if (I->mayReadFromMemory() &&
5584 !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5585 // We don't want to do any sophisticated alias analysis, so we only check
5586 // the instructions after I in I's parent block if we try to sink to its
5587 // successor block.
5588 if (DestBlock->getUniquePredecessor() != I->getParent())
5589 return false;
5590 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5591 E = I->getParent()->end();
5592 Scan != E; ++Scan)
5593 if (Scan->mayWriteToMemory())
5594 return false;
5595 }
5596
5597 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5598 auto *I = dyn_cast<Instruction>(Val: U->getUser());
5599 if (I && I->getParent() != DestBlock) {
5600 Worklist.add(I);
5601 return true;
5602 }
5603 return false;
5604 });
5605 /// FIXME: We could remove droppable uses that are not dominated by
5606 /// the new position.
5607
5608 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5609 I->moveBefore(BB&: *DestBlock, I: InsertPos);
5610 ++NumSunkInst;
5611
5612 // Also sink all related debug uses from the source basic block. Otherwise we
5613 // get debug use before the def. Attempt to salvage debug uses first, to
5614 // maximise the range variables have location for. If we cannot salvage, then
5615 // mark the location undef: we know it was supposed to receive a new location
5616 // here, but that computation has been sunk.
5617 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5618 findDbgUsers(V: I, DbgVariableRecords);
5619 if (!DbgVariableRecords.empty())
5620 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5621 DPUsers&: DbgVariableRecords);
5622
5623 // PS: there are numerous flaws with this behaviour, not least that right now
5624 // assignments can be re-ordered past other assignments to the same variable
5625 // if they use different Values. Creating more undef assignements can never be
5626 // undone. And salvaging all users outside of this block can un-necessarily
5627 // alter the lifetime of the live-value that the variable refers to.
5628 // Some of these things can be resolved by tolerating debug use-before-defs in
5629 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5630 // being used for more architectures.
5631
5632 return true;
5633}
5634
5635void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5636 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5637 BasicBlock *DestBlock,
5638 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5639 // For all debug values in the destination block, the sunk instruction
5640 // will still be available, so they do not need to be dropped.
5641
5642 // Fetch all DbgVariableRecords not already in the destination.
5643 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5644 for (auto &DVR : DbgVariableRecords)
5645 if (DVR->getParent() != DestBlock)
5646 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5647
5648 // Fetch a second collection, of DbgVariableRecords in the source block that
5649 // we're going to sink.
5650 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5651 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5652 if (DVR->getParent() == SrcBlock)
5653 DbgVariableRecordsToSink.push_back(Elt: DVR);
5654
5655 // Sort DbgVariableRecords according to their position in the block. This is a
5656 // partial order: DbgVariableRecords attached to different instructions will
5657 // be ordered by the instruction order, but DbgVariableRecords attached to the
5658 // same instruction won't have an order.
5659 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5660 return B->getInstruction()->comesBefore(Other: A->getInstruction());
5661 };
5662 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5663
5664 // If there are two assignments to the same variable attached to the same
5665 // instruction, the ordering between the two assignments is important. Scan
5666 // for this (rare) case and establish which is the last assignment.
5667 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5668 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5669 if (DbgVariableRecordsToSink.size() > 1) {
5670 SmallDenseMap<InstVarPair, unsigned> CountMap;
5671 // Count how many assignments to each variable there is per instruction.
5672 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5673 DebugVariable DbgUserVariable =
5674 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5675 DVR->getDebugLoc()->getInlinedAt());
5676 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
5677 }
5678
5679 // If there are any instructions with two assignments, add them to the
5680 // FilterOutMap to record that they need extra filtering.
5681 SmallPtrSet<const Instruction *, 4> DupSet;
5682 for (auto It : CountMap) {
5683 if (It.second > 1) {
5684 FilterOutMap[It.first] = nullptr;
5685 DupSet.insert(Ptr: It.first.first);
5686 }
5687 }
5688
5689 // For all instruction/variable pairs needing extra filtering, find the
5690 // latest assignment.
5691 for (const Instruction *Inst : DupSet) {
5692 for (DbgVariableRecord &DVR :
5693 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5694 DebugVariable DbgUserVariable =
5695 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5696 DVR.getDebugLoc()->getInlinedAt());
5697 auto FilterIt =
5698 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5699 if (FilterIt == FilterOutMap.end())
5700 continue;
5701 if (FilterIt->second != nullptr)
5702 continue;
5703 FilterIt->second = &DVR;
5704 }
5705 }
5706 }
5707
5708 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5709 // out any duplicate assignments identified above.
5710 SmallVector<DbgVariableRecord *, 2> DVRClones;
5711 SmallSet<DebugVariable, 4> SunkVariables;
5712 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5713 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5714 continue;
5715
5716 DebugVariable DbgUserVariable =
5717 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5718 DVR->getDebugLoc()->getInlinedAt());
5719
5720 // For any variable where there were multiple assignments in the same place,
5721 // ignore all but the last assignment.
5722 if (!FilterOutMap.empty()) {
5723 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5724 auto It = FilterOutMap.find(Val: IVP);
5725
5726 // Filter out.
5727 if (It != FilterOutMap.end() && It->second != DVR)
5728 continue;
5729 }
5730
5731 if (!SunkVariables.insert(V: DbgUserVariable).second)
5732 continue;
5733
5734 if (DVR->isDbgAssign())
5735 continue;
5736
5737 DVRClones.emplace_back(Args: DVR->clone());
5738 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5739 }
5740
5741 // Perform salvaging without the clones, then sink the clones.
5742 if (DVRClones.empty())
5743 return;
5744
5745 salvageDebugInfoForDbgValues(I&: *I, DPInsns: DbgVariableRecordsToSalvage);
5746
5747 // The clones are in reverse order of original appearance. Assert that the
5748 // head bit is set on the iterator as we _should_ have received it via
5749 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5750 // we'll repeatedly insert at the head, such as:
5751 // DVR-3 (third insertion goes here)
5752 // DVR-2 (second insertion goes here)
5753 // DVR-1 (first insertion goes here)
5754 // Any-Prior-DVRs
5755 // InsertPtInst
5756 assert(InsertPos.getHeadBit());
5757 for (DbgVariableRecord *DVRClone : DVRClones) {
5758 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5759 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5760 }
5761}
5762
5763bool InstCombinerImpl::run() {
5764 while (!Worklist.isEmpty()) {
5765 // Walk deferred instructions in reverse order, and push them to the
5766 // worklist, which means they'll end up popped from the worklist in-order.
5767 while (Instruction *I = Worklist.popDeferred()) {
5768 // Check to see if we can DCE the instruction. We do this already here to
5769 // reduce the number of uses and thus allow other folds to trigger.
5770 // Note that eraseInstFromFunction() may push additional instructions on
5771 // the deferred worklist, so this will DCE whole instruction chains.
5772 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5773 eraseInstFromFunction(I&: *I);
5774 ++NumDeadInst;
5775 continue;
5776 }
5777
5778 Worklist.push(I);
5779 }
5780
5781 Instruction *I = Worklist.removeOne();
5782 if (I == nullptr) continue; // skip null values.
5783
5784 // Check to see if we can DCE the instruction.
5785 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5786 eraseInstFromFunction(I&: *I);
5787 ++NumDeadInst;
5788 continue;
5789 }
5790
5791 if (!DebugCounter::shouldExecute(Counter&: VisitCounter))
5792 continue;
5793
5794 // See if we can trivially sink this instruction to its user if we can
5795 // prove that the successor is not executed more frequently than our block.
5796 // Return the UserBlock if successful.
5797 auto getOptionalSinkBlockForInst =
5798 [this](Instruction *I) -> std::optional<BasicBlock *> {
5799 if (!EnableCodeSinking)
5800 return std::nullopt;
5801
5802 BasicBlock *BB = I->getParent();
5803 BasicBlock *UserParent = nullptr;
5804 unsigned NumUsers = 0;
5805
5806 for (Use &U : I->uses()) {
5807 User *User = U.getUser();
5808 if (User->isDroppable()) {
5809 // Do not sink if there are dereferenceable assumes that would be
5810 // removed.
5811 auto II = dyn_cast<IntrinsicInst>(Val: User);
5812 if (II->getIntrinsicID() != Intrinsic::assume ||
5813 !II->getOperandBundle(Name: "dereferenceable"))
5814 continue;
5815 }
5816
5817 if (NumUsers > MaxSinkNumUsers)
5818 return std::nullopt;
5819
5820 Instruction *UserInst = cast<Instruction>(Val: User);
5821 // Special handling for Phi nodes - get the block the use occurs in.
5822 BasicBlock *UserBB = UserInst->getParent();
5823 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5824 UserBB = PN->getIncomingBlock(U);
5825 // Bail out if we have uses in different blocks. We don't do any
5826 // sophisticated analysis (i.e finding NearestCommonDominator of these
5827 // use blocks).
5828 if (UserParent && UserParent != UserBB)
5829 return std::nullopt;
5830 UserParent = UserBB;
5831
5832 // Make sure these checks are done only once, naturally we do the checks
5833 // the first time we get the userparent, this will save compile time.
5834 if (NumUsers == 0) {
5835 // Try sinking to another block. If that block is unreachable, then do
5836 // not bother. SimplifyCFG should handle it.
5837 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
5838 return std::nullopt;
5839
5840 auto *Term = UserParent->getTerminator();
5841 // See if the user is one of our successors that has only one
5842 // predecessor, so that we don't have to split the critical edge.
5843 // Another option where we can sink is a block that ends with a
5844 // terminator that does not pass control to other block (such as
5845 // return or unreachable or resume). In this case:
5846 // - I dominates the User (by SSA form);
5847 // - the User will be executed at most once.
5848 // So sinking I down to User is always profitable or neutral.
5849 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5850 return std::nullopt;
5851
5852 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5853 }
5854
5855 NumUsers++;
5856 }
5857
5858 // No user or only has droppable users.
5859 if (!UserParent)
5860 return std::nullopt;
5861
5862 return UserParent;
5863 };
5864
5865 auto OptBB = getOptionalSinkBlockForInst(I);
5866 if (OptBB) {
5867 auto *UserParent = *OptBB;
5868 // Okay, the CFG is simple enough, try to sink this instruction.
5869 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5870 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5871 MadeIRChange = true;
5872 // We'll add uses of the sunk instruction below, but since
5873 // sinking can expose opportunities for it's *operands* add
5874 // them to the worklist
5875 for (Use &U : I->operands())
5876 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5877 Worklist.push(I: OpI);
5878 }
5879 }
5880
5881 // Now that we have an instruction, try combining it to simplify it.
5882 Builder.SetInsertPoint(I);
5883 Builder.CollectMetadataToCopy(
5884 Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5885
5886#ifndef NDEBUG
5887 std::string OrigI;
5888#endif
5889 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5890 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5891
5892 if (Instruction *Result = visit(I&: *I)) {
5893 ++NumCombined;
5894 // Should we replace the old instruction with a new one?
5895 if (Result != I) {
5896 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5897 << " New = " << *Result << '\n');
5898
5899 // We copy the old instruction's DebugLoc to the new instruction, unless
5900 // InstCombine already assigned a DebugLoc to it, in which case we
5901 // should trust the more specifically selected DebugLoc.
5902 Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5903 // We also copy annotation metadata to the new instruction.
5904 Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5905 // Everything uses the new instruction now.
5906 I->replaceAllUsesWith(V: Result);
5907
5908 // Move the name to the new instruction first.
5909 Result->takeName(V: I);
5910
5911 // Insert the new instruction into the basic block...
5912 BasicBlock *InstParent = I->getParent();
5913 BasicBlock::iterator InsertPos = I->getIterator();
5914
5915 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5916 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5917 // We need to fix up the insertion point.
5918 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5919 InsertPos = InstParent->getFirstInsertionPt();
5920 else // Non-PHI -> PHI
5921 InsertPos = InstParent->getFirstNonPHIIt();
5922 }
5923
5924 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5925
5926 // Push the new instruction and any users onto the worklist.
5927 Worklist.pushUsersToWorkList(I&: *Result);
5928 Worklist.push(I: Result);
5929
5930 eraseInstFromFunction(I&: *I);
5931 } else {
5932 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5933 << " New = " << *I << '\n');
5934
5935 // If the instruction was modified, it's possible that it is now dead.
5936 // if so, remove it.
5937 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5938 eraseInstFromFunction(I&: *I);
5939 } else {
5940 Worklist.pushUsersToWorkList(I&: *I);
5941 Worklist.push(I);
5942 }
5943 }
5944 MadeIRChange = true;
5945 }
5946 }
5947
5948 Worklist.zap();
5949 return MadeIRChange;
5950}
5951
5952// Track the scopes used by !alias.scope and !noalias. In a function, a
5953// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5954// by both sets. If not, the declaration of the scope can be safely omitted.
5955// The MDNode of the scope can be omitted as well for the instructions that are
5956// part of this function. We do not do that at this point, as this might become
5957// too time consuming to do.
5958class AliasScopeTracker {
5959 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5960 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5961
5962public:
5963 void analyse(Instruction *I) {
5964 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5965 if (!I->hasMetadataOtherThanDebugLoc())
5966 return;
5967
5968 auto Track = [](Metadata *ScopeList, auto &Container) {
5969 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5970 if (!MDScopeList || !Container.insert(MDScopeList).second)
5971 return;
5972 for (const auto &MDOperand : MDScopeList->operands())
5973 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5974 Container.insert(MDScope);
5975 };
5976
5977 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5978 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5979 }
5980
5981 bool isNoAliasScopeDeclDead(Instruction *Inst) {
5982 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5983 if (!Decl)
5984 return false;
5985
5986 assert(Decl->use_empty() &&
5987 "llvm.experimental.noalias.scope.decl in use ?");
5988 const MDNode *MDSL = Decl->getScopeList();
5989 assert(MDSL->getNumOperands() == 1 &&
5990 "llvm.experimental.noalias.scope should refer to a single scope");
5991 auto &MDOperand = MDSL->getOperand(I: 0);
5992 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5993 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
5994 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
5995
5996 // Not an MDNode ? throw away.
5997 return true;
5998 }
5999};
6000
6001/// Populate the IC worklist from a function, by walking it in reverse
6002/// post-order and adding all reachable code to the worklist.
6003///
6004/// This has a couple of tricks to make the code faster and more powerful. In
6005/// particular, we constant fold and DCE instructions as we go, to avoid adding
6006/// them to the worklist (this significantly speeds up instcombine on code where
6007/// many instructions are dead or constant). Additionally, if we find a branch
6008/// whose condition is a known constant, we only visit the reachable successors.
6009bool InstCombinerImpl::prepareWorklist(Function &F) {
6010 bool MadeIRChange = false;
6011 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
6012 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6013 DenseMap<Constant *, Constant *> FoldedConstants;
6014 AliasScopeTracker SeenAliasScopes;
6015
6016 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6017 for (BasicBlock *Succ : successors(BB))
6018 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
6019 for (PHINode &PN : Succ->phis())
6020 for (Use &U : PN.incoming_values())
6021 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
6022 U.set(PoisonValue::get(T: PN.getType()));
6023 MadeIRChange = true;
6024 }
6025 };
6026
6027 for (BasicBlock *BB : RPOT) {
6028 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
6029 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
6030 })) {
6031 HandleOnlyLiveSuccessor(BB, nullptr);
6032 continue;
6033 }
6034 LiveBlocks.insert(Ptr: BB);
6035
6036 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
6037 // ConstantProp instruction if trivially constant.
6038 if (!Inst.use_empty() &&
6039 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
6040 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
6041 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6042 << '\n');
6043 Inst.replaceAllUsesWith(V: C);
6044 ++NumConstProp;
6045 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
6046 Inst.eraseFromParent();
6047 MadeIRChange = true;
6048 continue;
6049 }
6050
6051 // See if we can constant fold its operands.
6052 for (Use &U : Inst.operands()) {
6053 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
6054 continue;
6055
6056 auto *C = cast<Constant>(Val&: U);
6057 Constant *&FoldRes = FoldedConstants[C];
6058 if (!FoldRes)
6059 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
6060
6061 if (FoldRes != C) {
6062 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6063 << "\n Old = " << *C
6064 << "\n New = " << *FoldRes << '\n');
6065 U = FoldRes;
6066 MadeIRChange = true;
6067 }
6068 }
6069
6070 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6071 // these call instructions consumes non-trivial amount of time and
6072 // provides no value for the optimization.
6073 if (!Inst.isDebugOrPseudoInst()) {
6074 InstrsForInstructionWorklist.push_back(Elt: &Inst);
6075 SeenAliasScopes.analyse(I: &Inst);
6076 }
6077 }
6078
6079 // If this is a branch or switch on a constant, mark only the single
6080 // live successor. Otherwise assume all successors are live.
6081 Instruction *TI = BB->getTerminator();
6082 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
6083 if (isa<UndefValue>(Val: BI->getCondition())) {
6084 // Branch on undef is UB.
6085 HandleOnlyLiveSuccessor(BB, nullptr);
6086 continue;
6087 }
6088 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
6089 bool CondVal = Cond->getZExtValue();
6090 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
6091 continue;
6092 }
6093 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
6094 if (isa<UndefValue>(Val: SI->getCondition())) {
6095 // Switch on undef is UB.
6096 HandleOnlyLiveSuccessor(BB, nullptr);
6097 continue;
6098 }
6099 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
6100 HandleOnlyLiveSuccessor(BB,
6101 SI->findCaseValue(C: Cond)->getCaseSuccessor());
6102 continue;
6103 }
6104 }
6105 }
6106
6107 // Remove instructions inside unreachable blocks. This prevents the
6108 // instcombine code from having to deal with some bad special cases, and
6109 // reduces use counts of instructions.
6110 for (BasicBlock &BB : F) {
6111 if (LiveBlocks.count(Ptr: &BB))
6112 continue;
6113
6114 unsigned NumDeadInstInBB;
6115 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
6116
6117 MadeIRChange |= NumDeadInstInBB != 0;
6118 NumDeadInst += NumDeadInstInBB;
6119 }
6120
6121 // Once we've found all of the instructions to add to instcombine's worklist,
6122 // add them in reverse order. This way instcombine will visit from the top
6123 // of the function down. This jives well with the way that it adds all uses
6124 // of instructions to the worklist after doing a transformation, thus avoiding
6125 // some N^2 behavior in pathological cases.
6126 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
6127 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
6128 // DCE instruction if trivially dead. As we iterate in reverse program
6129 // order here, we will clean up whole chains of dead instructions.
6130 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
6131 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6132 ++NumDeadInst;
6133 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6134 salvageDebugInfo(I&: *Inst);
6135 Inst->eraseFromParent();
6136 MadeIRChange = true;
6137 continue;
6138 }
6139
6140 Worklist.push(I: Inst);
6141 }
6142
6143 return MadeIRChange;
6144}
6145
6146void InstCombiner::computeBackEdges() {
6147 // Collect backedges.
6148 SmallVector<bool> Visited(F.getMaxBlockNumber());
6149 for (BasicBlock *BB : RPOT) {
6150 Visited[BB->getNumber()] = true;
6151 for (BasicBlock *Succ : successors(BB))
6152 if (Visited[Succ->getNumber()])
6153 BackEdges.insert(V: {BB, Succ});
6154 }
6155 ComputedBackEdges = true;
6156}
6157
6158static bool combineInstructionsOverFunction(
6159 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
6160 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
6161 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
6162 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
6163 const InstCombineOptions &Opts) {
6164 auto &DL = F.getDataLayout();
6165 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6166 !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
6167
6168 /// Builder - This is an IRBuilder that automatically inserts new
6169 /// instructions into the worklist when they are created.
6170 IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
6171 F.getContext(), TargetFolder(DL),
6172 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6173 Worklist.add(I);
6174 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
6175 AC.registerAssumption(CI: Assume);
6176 }));
6177
6178 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
6179
6180 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6181 // by instcombiner.
6182 bool MadeIRChange = false;
6183 if (ShouldLowerDbgDeclare)
6184 MadeIRChange = LowerDbgDeclare(F);
6185
6186 // Iterate while there is work to do.
6187 unsigned Iteration = 0;
6188 while (true) {
6189 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6190 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6191 << " on " << F.getName()
6192 << " reached; stopping without verifying fixpoint\n");
6193 break;
6194 }
6195
6196 ++Iteration;
6197 ++NumWorklistIterations;
6198 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6199 << F.getName() << "\n");
6200
6201 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6202 BPI, PSI, DL, RPOT);
6203 IC.MaxArraySizeForCombine = MaxArraySize;
6204 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6205 MadeChangeInThisIteration |= IC.run();
6206 if (!MadeChangeInThisIteration)
6207 break;
6208
6209 MadeIRChange = true;
6210 if (Iteration > Opts.MaxIterations) {
6211 reportFatalUsageError(
6212 reason: "Instruction Combining on " + Twine(F.getName()) +
6213 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6214 " iterations. " +
6215 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6216 "'instcombine-no-verify-fixpoint' to suppress this error.");
6217 }
6218 }
6219
6220 if (Iteration == 1)
6221 ++NumOneIteration;
6222 else if (Iteration == 2)
6223 ++NumTwoIterations;
6224 else if (Iteration == 3)
6225 ++NumThreeIterations;
6226 else
6227 ++NumFourOrMoreIterations;
6228
6229 return MadeIRChange;
6230}
6231
6232InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
6233
6234void InstCombinePass::printPipeline(
6235 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6236 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6237 OS, MapClassName2PassName);
6238 OS << '<';
6239 OS << "max-iterations=" << Options.MaxIterations << ";";
6240 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6241 OS << '>';
6242}
6243
6244char InstCombinePass::ID = 0;
6245
6246PreservedAnalyses InstCombinePass::run(Function &F,
6247 FunctionAnalysisManager &AM) {
6248 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
6249 // No changes since last InstCombine pass, exit early.
6250 if (LRT.shouldSkip(ID: &ID))
6251 return PreservedAnalyses::all();
6252
6253 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
6254 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
6255 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
6256 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
6257 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
6258
6259 auto *AA = &AM.getResult<AAManager>(IR&: F);
6260 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
6261 ProfileSummaryInfo *PSI =
6262 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
6263 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6264 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
6265 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
6266
6267 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6268 BFI, BPI, PSI, Opts: Options)) {
6269 // No changes, all analyses are preserved.
6270 LRT.update(ID: &ID, /*Changed=*/false);
6271 return PreservedAnalyses::all();
6272 }
6273
6274 // Mark all the analyses that instcombine updates as preserved.
6275 PreservedAnalyses PA;
6276 LRT.update(ID: &ID, /*Changed=*/true);
6277 PA.preserve<LastRunTrackingAnalysis>();
6278 PA.preserveSet<CFGAnalyses>();
6279 return PA;
6280}
6281
6282void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
6283 AU.setPreservesCFG();
6284 AU.addRequired<AAResultsWrapperPass>();
6285 AU.addRequired<AssumptionCacheTracker>();
6286 AU.addRequired<TargetLibraryInfoWrapperPass>();
6287 AU.addRequired<TargetTransformInfoWrapperPass>();
6288 AU.addRequired<DominatorTreeWrapperPass>();
6289 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
6290 AU.addPreserved<DominatorTreeWrapperPass>();
6291 AU.addPreserved<AAResultsWrapperPass>();
6292 AU.addPreserved<BasicAAWrapperPass>();
6293 AU.addPreserved<GlobalsAAWrapperPass>();
6294 AU.addRequired<ProfileSummaryInfoWrapperPass>();
6295 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
6296}
6297
6298bool InstructionCombiningPass::runOnFunction(Function &F) {
6299 if (skipFunction(F))
6300 return false;
6301
6302 // Required analyses.
6303 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6304 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6305 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6306 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6307 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6308 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6309
6310 // Optional analyses.
6311 ProfileSummaryInfo *PSI =
6312 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6313 BlockFrequencyInfo *BFI =
6314 (PSI && PSI->hasProfileSummary()) ?
6315 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6316 nullptr;
6317 BranchProbabilityInfo *BPI = nullptr;
6318 if (auto *WrapperPass =
6319 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6320 BPI = &WrapperPass->getBPI();
6321
6322 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6323 BFI, BPI, PSI, Opts: InstCombineOptions());
6324}
6325
6326char InstructionCombiningPass::ID = 0;
6327
6328InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {}
6329
6330INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6331 "Combine redundant instructions", false, false)
6332INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6333INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6334INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6335INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6336INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6337INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6338INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6339INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6340INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6341INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6342 "Combine redundant instructions", false, false)
6343
6344// Initialization Routines.
6345void llvm::initializeInstCombine(PassRegistry &Registry) {
6346 initializeInstructionCombiningPassPass(Registry);
6347}
6348
6349FunctionPass *llvm::createInstructionCombiningPass() {
6350 return new InstructionCombiningPass();
6351}
6352