1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AssumptionCache.h"
45#include "llvm/Analysis/BasicAliasAnalysis.h"
46#include "llvm/Analysis/BlockFrequencyInfo.h"
47#include "llvm/Analysis/CFG.h"
48#include "llvm/Analysis/ConstantFolding.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/Analysis/InstructionSimplify.h"
51#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53#include "llvm/Analysis/MemoryBuiltins.h"
54#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55#include "llvm/Analysis/ProfileSummaryInfo.h"
56#include "llvm/Analysis/TargetFolder.h"
57#include "llvm/Analysis/TargetLibraryInfo.h"
58#include "llvm/Analysis/TargetTransformInfo.h"
59#include "llvm/Analysis/Utils/Local.h"
60#include "llvm/Analysis/ValueTracking.h"
61#include "llvm/Analysis/VectorUtils.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/Dominators.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
83#include "llvm/IR/PatternMatch.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
89#include "llvm/InitializePasses.h"
90#include "llvm/Support/Casting.h"
91#include "llvm/Support/CommandLine.h"
92#include "llvm/Support/Compiler.h"
93#include "llvm/Support/Debug.h"
94#include "llvm/Support/DebugCounter.h"
95#include "llvm/Support/ErrorHandling.h"
96#include "llvm/Support/KnownBits.h"
97#include "llvm/Support/KnownFPClass.h"
98#include "llvm/Support/raw_ostream.h"
99#include "llvm/Transforms/InstCombine/InstCombine.h"
100#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101#include "llvm/Transforms/Utils/Local.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
111#include "llvm/Transforms/Utils/InstructionWorklist.h"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(Val: true));
138
139static cl::opt<unsigned> MaxSinkNumUsers(
140 "instcombine-max-sink-users", cl::init(Val: 32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
143static cl::opt<unsigned>
144MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147namespace llvm {
148extern cl::opt<bool> ProfcheckDisableMetadataFixes;
149} // end namespace llvm
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(Val: true));
160
161std::optional<Instruction *>
162InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
163 // Handle target specific intrinsics
164 if (II.getCalledFunction()->isTargetIntrinsic()) {
165 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
166 }
167 return std::nullopt;
168}
169
170std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
171 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
172 bool &KnownBitsComputed) {
173 // Handle target specific intrinsics
174 if (II.getCalledFunction()->isTargetIntrinsic()) {
175 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
176 IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
177 }
178 return std::nullopt;
179}
180
181std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
182 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
183 APInt &PoisonElts2, APInt &PoisonElts3,
184 std::function<void(Instruction *, unsigned, APInt, APInt &)>
185 SimplifyAndSetOp) {
186 // Handle target specific intrinsics
187 if (II.getCalledFunction()->isTargetIntrinsic()) {
188 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
189 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
190 SimplifyAndSetOp);
191 }
192 return std::nullopt;
193}
194
195bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
196 // Approved exception for TTI use: This queries a legality property of the
197 // target, not an profitability heuristic. Ideally this should be part of
198 // DataLayout instead.
199 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
200}
201
202Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
203 if (!RewriteGEP)
204 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
205
206 IRBuilderBase::InsertPointGuard Guard(Builder);
207 auto *Inst = dyn_cast<Instruction>(Val: GEP);
208 if (Inst)
209 Builder.SetInsertPoint(Inst);
210
211 Value *Offset = EmitGEPOffset(GEP);
212 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
213 if (Inst && !GEP->hasAllConstantIndices() &&
214 !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) {
215 replaceInstUsesWith(
216 I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
217 IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
218 eraseInstFromFunction(I&: *Inst);
219 }
220 return Offset;
221}
222
223Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
224 GEPNoWrapFlags NW, Type *IdxTy,
225 bool RewriteGEPs) {
226 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
227 if (Sum)
228 return Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
229 HasNSW: NW.isInBounds());
230 else
231 return Offset;
232 };
233
234 Value *Sum = nullptr;
235 Value *OneUseSum = nullptr;
236 Value *OneUseBase = nullptr;
237 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
238 for (GEPOperator *GEP : reverse(C&: GEPs)) {
239 Value *Offset;
240 {
241 // Expand the offset at the point of the previous GEP to enable rewriting.
242 // However, use the original insertion point for calculating Sum.
243 IRBuilderBase::InsertPointGuard Guard(Builder);
244 auto *Inst = dyn_cast<Instruction>(Val: GEP);
245 if (RewriteGEPs && Inst)
246 Builder.SetInsertPoint(Inst);
247
248 Offset = llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
249 if (Offset->getType() != IdxTy)
250 Offset = Builder.CreateVectorSplat(
251 EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
252 if (GEP->hasOneUse()) {
253 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
254 OneUseSum = Add(OneUseSum, Offset);
255 OneUseFlags = OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
256 if (!OneUseBase)
257 OneUseBase = GEP->getPointerOperand();
258 continue;
259 }
260
261 if (OneUseSum)
262 Offset = Add(OneUseSum, Offset);
263
264 // Rewrite the GEP to reuse the computed offset. This also includes
265 // offsets from preceding one-use GEPs.
266 if (RewriteGEPs && Inst &&
267 !(GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8) &&
268 GEP->getOperand(i_nocapture: 1) == Offset)) {
269 replaceInstUsesWith(
270 I&: *Inst,
271 V: Builder.CreatePtrAdd(
272 Ptr: OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, Name: "",
273 NW: OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags())));
274 eraseInstFromFunction(I&: *Inst);
275 }
276 }
277
278 Sum = Add(Sum, Offset);
279 OneUseSum = OneUseBase = nullptr;
280 OneUseFlags = GEPNoWrapFlags::all();
281 }
282 if (OneUseSum)
283 Sum = Add(Sum, OneUseSum);
284 if (!Sum)
285 return Constant::getNullValue(Ty: IdxTy);
286 return Sum;
287}
288
289/// Legal integers and common types are considered desirable. This is used to
290/// avoid creating instructions with types that may not be supported well by the
291/// the backend.
292/// NOTE: This treats i8, i16 and i32 specially because they are common
293/// types in frontend languages.
294bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
295 switch (BitWidth) {
296 case 8:
297 case 16:
298 case 32:
299 return true;
300 default:
301 return DL.isLegalInteger(Width: BitWidth);
302 }
303}
304
305/// Return true if it is desirable to convert an integer computation from a
306/// given bit width to a new bit width.
307/// We don't want to convert from a legal or desirable type (like i8) to an
308/// illegal type or from a smaller to a larger illegal type. A width of '1'
309/// is always treated as a desirable type because i1 is a fundamental type in
310/// IR, and there are many specialized optimizations for i1 types.
311/// Common/desirable widths are equally treated as legal to convert to, in
312/// order to open up more combining opportunities.
313bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
314 unsigned ToWidth) const {
315 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
316 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
317
318 // Convert to desirable widths even if they are not legal types.
319 // Only shrink types, to prevent infinite loops.
320 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
321 return true;
322
323 // If this is a legal or desiable integer from type, and the result would be
324 // an illegal type, don't do the transformation.
325 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
326 return false;
327
328 // Otherwise, if both are illegal, do not increase the size of the result. We
329 // do allow things like i160 -> i64, but not i64 -> i160.
330 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
331 return false;
332
333 return true;
334}
335
336/// Return true if it is desirable to convert a computation from 'From' to 'To'.
337/// We don't want to convert from a legal to an illegal type or from a smaller
338/// to a larger illegal type. i1 is always treated as a legal type because it is
339/// a fundamental type in IR, and there are many specialized optimizations for
340/// i1 types.
341bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
342 // TODO: This could be extended to allow vectors. Datalayout changes might be
343 // needed to properly support that.
344 if (!From->isIntegerTy() || !To->isIntegerTy())
345 return false;
346
347 unsigned FromWidth = From->getPrimitiveSizeInBits();
348 unsigned ToWidth = To->getPrimitiveSizeInBits();
349 return shouldChangeType(FromWidth, ToWidth);
350}
351
352// Return true, if No Signed Wrap should be maintained for I.
353// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
354// where both B and C should be ConstantInts, results in a constant that does
355// not overflow. This function only handles the Add/Sub/Mul opcodes. For
356// all other opcodes, the function conservatively returns false.
357static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
358 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
359 if (!OBO || !OBO->hasNoSignedWrap())
360 return false;
361
362 const APInt *BVal, *CVal;
363 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
364 return false;
365
366 // We reason about Add/Sub/Mul Only.
367 bool Overflow = false;
368 switch (I.getOpcode()) {
369 case Instruction::Add:
370 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
371 break;
372 case Instruction::Sub:
373 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
374 break;
375 case Instruction::Mul:
376 (void)BVal->smul_ov(RHS: *CVal, Overflow);
377 break;
378 default:
379 // Conservatively return false for other opcodes.
380 return false;
381 }
382 return !Overflow;
383}
384
385static bool hasNoUnsignedWrap(BinaryOperator &I) {
386 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
387 return OBO && OBO->hasNoUnsignedWrap();
388}
389
390static bool hasNoSignedWrap(BinaryOperator &I) {
391 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
392 return OBO && OBO->hasNoSignedWrap();
393}
394
395/// Conservatively clears subclassOptionalData after a reassociation or
396/// commutation. We preserve fast-math flags when applicable as they can be
397/// preserved.
398static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
399 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
400 if (!FPMO) {
401 I.clearSubclassOptionalData();
402 return;
403 }
404
405 FastMathFlags FMF = I.getFastMathFlags();
406 I.clearSubclassOptionalData();
407 I.setFastMathFlags(FMF);
408}
409
410/// Combine constant operands of associative operations either before or after a
411/// cast to eliminate one of the associative operations:
412/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
413/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
414static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
415 InstCombinerImpl &IC) {
416 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
417 if (!Cast || !Cast->hasOneUse())
418 return false;
419
420 // TODO: Enhance logic for other casts and remove this check.
421 auto CastOpcode = Cast->getOpcode();
422 if (CastOpcode != Instruction::ZExt)
423 return false;
424
425 // TODO: Enhance logic for other BinOps and remove this check.
426 if (!BinOp1->isBitwiseLogicOp())
427 return false;
428
429 auto AssocOpcode = BinOp1->getOpcode();
430 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
431 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
432 return false;
433
434 Constant *C1, *C2;
435 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
436 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
437 return false;
438
439 // TODO: This assumes a zext cast.
440 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
441 // to the destination type might lose bits.
442
443 // Fold the constants together in the destination type:
444 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
445 const DataLayout &DL = IC.getDataLayout();
446 Type *DestTy = C1->getType();
447 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
448 if (!CastC2)
449 return false;
450 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
451 if (!FoldedC)
452 return false;
453
454 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
455 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
456 BinOp1->dropPoisonGeneratingFlags();
457 Cast->dropPoisonGeneratingFlags();
458 return true;
459}
460
461// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
462// inttoptr ( ptrtoint (x) ) --> x
463Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
464 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
465 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
466 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
467 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
468 Type *CastTy = IntToPtr->getDestTy();
469 if (PtrToInt &&
470 CastTy->getPointerAddressSpace() ==
471 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
472 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
473 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
474 return PtrToInt->getOperand(i_nocapture: 0);
475 }
476 return nullptr;
477}
478
479/// This performs a few simplifications for operators that are associative or
480/// commutative:
481///
482/// Commutative operators:
483///
484/// 1. Order operands such that they are listed from right (least complex) to
485/// left (most complex). This puts constants before unary operators before
486/// binary operators.
487///
488/// Associative operators:
489///
490/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
491/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
492///
493/// Associative and commutative operators:
494///
495/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
496/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
497/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
498/// if C1 and C2 are constants.
499bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
500 Instruction::BinaryOps Opcode = I.getOpcode();
501 bool Changed = false;
502
503 do {
504 // Order operands such that they are listed from right (least complex) to
505 // left (most complex). This puts constants before unary operators before
506 // binary operators.
507 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
508 getComplexity(V: I.getOperand(i_nocapture: 1)))
509 Changed = !I.swapOperands();
510
511 if (I.isCommutative()) {
512 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
513 replaceOperand(I, OpNum: 0, V: Pair->first);
514 replaceOperand(I, OpNum: 1, V: Pair->second);
515 Changed = true;
516 }
517 }
518
519 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
520 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
521
522 if (I.isAssociative()) {
523 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
524 if (Op0 && Op0->getOpcode() == Opcode) {
525 Value *A = Op0->getOperand(i_nocapture: 0);
526 Value *B = Op0->getOperand(i_nocapture: 1);
527 Value *C = I.getOperand(i_nocapture: 1);
528
529 // Does "B op C" simplify?
530 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
531 // It simplifies to V. Form "A op V".
532 replaceOperand(I, OpNum: 0, V: A);
533 replaceOperand(I, OpNum: 1, V);
534 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
535 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
536
537 // Conservatively clear all optional flags since they may not be
538 // preserved by the reassociation. Reset nsw/nuw based on the above
539 // analysis.
540 ClearSubclassDataAfterReassociation(I);
541
542 // Note: this is only valid because SimplifyBinOp doesn't look at
543 // the operands to Op0.
544 if (IsNUW)
545 I.setHasNoUnsignedWrap(true);
546
547 if (IsNSW)
548 I.setHasNoSignedWrap(true);
549
550 Changed = true;
551 ++NumReassoc;
552 continue;
553 }
554 }
555
556 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
557 if (Op1 && Op1->getOpcode() == Opcode) {
558 Value *A = I.getOperand(i_nocapture: 0);
559 Value *B = Op1->getOperand(i_nocapture: 0);
560 Value *C = Op1->getOperand(i_nocapture: 1);
561
562 // Does "A op B" simplify?
563 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
564 // It simplifies to V. Form "V op C".
565 replaceOperand(I, OpNum: 0, V);
566 replaceOperand(I, OpNum: 1, V: C);
567 // Conservatively clear the optional flags, since they may not be
568 // preserved by the reassociation.
569 ClearSubclassDataAfterReassociation(I);
570 Changed = true;
571 ++NumReassoc;
572 continue;
573 }
574 }
575 }
576
577 if (I.isAssociative() && I.isCommutative()) {
578 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
579 Changed = true;
580 ++NumReassoc;
581 continue;
582 }
583
584 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
585 if (Op0 && Op0->getOpcode() == Opcode) {
586 Value *A = Op0->getOperand(i_nocapture: 0);
587 Value *B = Op0->getOperand(i_nocapture: 1);
588 Value *C = I.getOperand(i_nocapture: 1);
589
590 // Does "C op A" simplify?
591 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
592 // It simplifies to V. Form "V op B".
593 replaceOperand(I, OpNum: 0, V);
594 replaceOperand(I, OpNum: 1, V: B);
595 // Conservatively clear the optional flags, since they may not be
596 // preserved by the reassociation.
597 ClearSubclassDataAfterReassociation(I);
598 Changed = true;
599 ++NumReassoc;
600 continue;
601 }
602 }
603
604 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
605 if (Op1 && Op1->getOpcode() == Opcode) {
606 Value *A = I.getOperand(i_nocapture: 0);
607 Value *B = Op1->getOperand(i_nocapture: 0);
608 Value *C = Op1->getOperand(i_nocapture: 1);
609
610 // Does "C op A" simplify?
611 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
612 // It simplifies to V. Form "B op V".
613 replaceOperand(I, OpNum: 0, V: B);
614 replaceOperand(I, OpNum: 1, V);
615 // Conservatively clear the optional flags, since they may not be
616 // preserved by the reassociation.
617 ClearSubclassDataAfterReassociation(I);
618 Changed = true;
619 ++NumReassoc;
620 continue;
621 }
622 }
623
624 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
625 // if C1 and C2 are constants.
626 Value *A, *B;
627 Constant *C1, *C2, *CRes;
628 if (Op0 && Op1 &&
629 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
630 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
631 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
632 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
633 bool IsNUW = hasNoUnsignedWrap(I) &&
634 hasNoUnsignedWrap(I&: *Op0) &&
635 hasNoUnsignedWrap(I&: *Op1);
636 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
637 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
638 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
639
640 if (isa<FPMathOperator>(Val: NewBO)) {
641 FastMathFlags Flags = I.getFastMathFlags() &
642 Op0->getFastMathFlags() &
643 Op1->getFastMathFlags();
644 NewBO->setFastMathFlags(Flags);
645 }
646 InsertNewInstWith(New: NewBO, Old: I.getIterator());
647 NewBO->takeName(V: Op1);
648 replaceOperand(I, OpNum: 0, V: NewBO);
649 replaceOperand(I, OpNum: 1, V: CRes);
650 // Conservatively clear the optional flags, since they may not be
651 // preserved by the reassociation.
652 ClearSubclassDataAfterReassociation(I);
653 if (IsNUW)
654 I.setHasNoUnsignedWrap(true);
655
656 Changed = true;
657 continue;
658 }
659 }
660
661 // No further simplifications.
662 return Changed;
663 } while (true);
664}
665
666/// Return whether "X LOp (Y ROp Z)" is always equal to
667/// "(X LOp Y) ROp (X LOp Z)".
668static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
669 Instruction::BinaryOps ROp) {
670 // X & (Y | Z) <--> (X & Y) | (X & Z)
671 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
672 if (LOp == Instruction::And)
673 return ROp == Instruction::Or || ROp == Instruction::Xor;
674
675 // X | (Y & Z) <--> (X | Y) & (X | Z)
676 if (LOp == Instruction::Or)
677 return ROp == Instruction::And;
678
679 // X * (Y + Z) <--> (X * Y) + (X * Z)
680 // X * (Y - Z) <--> (X * Y) - (X * Z)
681 if (LOp == Instruction::Mul)
682 return ROp == Instruction::Add || ROp == Instruction::Sub;
683
684 return false;
685}
686
687/// Return whether "(X LOp Y) ROp Z" is always equal to
688/// "(X ROp Z) LOp (Y ROp Z)".
689static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
690 Instruction::BinaryOps ROp) {
691 if (Instruction::isCommutative(Opcode: ROp))
692 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
693
694 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
695 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
696
697 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
698 // but this requires knowing that the addition does not overflow and other
699 // such subtleties.
700}
701
702/// This function returns identity value for given opcode, which can be used to
703/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
704static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
705 if (isa<Constant>(Val: V))
706 return nullptr;
707
708 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
709}
710
711/// This function predicates factorization using distributive laws. By default,
712/// it just returns the 'Op' inputs. But for special-cases like
713/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
714/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
715/// allow more factorization opportunities.
716static Instruction::BinaryOps
717getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
718 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
719 assert(Op && "Expected a binary operator");
720 LHS = Op->getOperand(i_nocapture: 0);
721 RHS = Op->getOperand(i_nocapture: 1);
722 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
723 Constant *C;
724 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
725 // X << C --> X * (1 << C)
726 RHS = ConstantFoldBinaryInstruction(
727 Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C);
728 assert(RHS && "Constant folding of immediate constants failed");
729 return Instruction::Mul;
730 }
731 // TODO: We can add other conversions e.g. shr => div etc.
732 }
733 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
734 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
735 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
736 // lshr nneg C, X --> ashr nneg C, X
737 return Instruction::AShr;
738 }
739 }
740 return Op->getOpcode();
741}
742
743/// This tries to simplify binary operations by factorizing out common terms
744/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
745static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
746 InstCombiner::BuilderTy &Builder,
747 Instruction::BinaryOps InnerOpcode, Value *A,
748 Value *B, Value *C, Value *D) {
749 assert(A && B && C && D && "All values must be provided");
750
751 Value *V = nullptr;
752 Value *RetVal = nullptr;
753 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
754 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
755
756 // Does "X op' Y" always equal "Y op' X"?
757 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
758
759 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
760 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
761 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
762 // commutative case, "(A op' B) op (C op' A)"?
763 if (A == C || (InnerCommutative && A == D)) {
764 if (A != C)
765 std::swap(a&: C, b&: D);
766 // Consider forming "A op' (B op D)".
767 // If "B op D" simplifies then it can be formed with no cost.
768 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
769
770 // If "B op D" doesn't simplify then only go on if one of the existing
771 // operations "A op' B" and "C op' D" will be zapped as no longer used.
772 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
773 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
774 if (V)
775 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
776 }
777 }
778
779 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
780 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
781 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
782 // commutative case, "(A op' B) op (B op' D)"?
783 if (B == D || (InnerCommutative && B == C)) {
784 if (B != D)
785 std::swap(a&: C, b&: D);
786 // Consider forming "(A op C) op' B".
787 // If "A op C" simplifies then it can be formed with no cost.
788 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
789
790 // If "A op C" doesn't simplify then only go on if one of the existing
791 // operations "A op' B" and "C op' D" will be zapped as no longer used.
792 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
793 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
794 if (V)
795 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
796 }
797 }
798
799 if (!RetVal)
800 return nullptr;
801
802 ++NumFactor;
803 RetVal->takeName(V: &I);
804
805 // Try to add no-overflow flags to the final value.
806 if (isa<BinaryOperator>(Val: RetVal)) {
807 bool HasNSW = false;
808 bool HasNUW = false;
809 if (isa<OverflowingBinaryOperator>(Val: &I)) {
810 HasNSW = I.hasNoSignedWrap();
811 HasNUW = I.hasNoUnsignedWrap();
812 }
813 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
814 HasNSW &= LOBO->hasNoSignedWrap();
815 HasNUW &= LOBO->hasNoUnsignedWrap();
816 }
817
818 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
819 HasNSW &= ROBO->hasNoSignedWrap();
820 HasNUW &= ROBO->hasNoUnsignedWrap();
821 }
822
823 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
824 // We can propagate 'nsw' if we know that
825 // %Y = mul nsw i16 %X, C
826 // %Z = add nsw i16 %Y, %X
827 // =>
828 // %Z = mul nsw i16 %X, C+1
829 //
830 // iff C+1 isn't INT_MIN
831 const APInt *CInt;
832 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
833 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
834
835 // nuw can be propagated with any constant or nuw value.
836 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
837 }
838 }
839 return RetVal;
840}
841
842// If `I` has one Const operand and the other matches `(ctpop (not x))`,
843// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
844// This is only useful is the new subtract can fold so we only handle the
845// following cases:
846// 1) (add/sub/disjoint_or C, (ctpop (not x))
847// -> (add/sub/disjoint_or C', (ctpop x))
848// 1) (cmp pred C, (ctpop (not x))
849// -> (cmp pred C', (ctpop x))
850Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
851 unsigned Opc = I->getOpcode();
852 unsigned ConstIdx = 1;
853 switch (Opc) {
854 default:
855 return nullptr;
856 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
857 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
858 // is constant.
859 case Instruction::Sub:
860 ConstIdx = 0;
861 break;
862 case Instruction::ICmp:
863 // Signed predicates aren't correct in some edge cases like for i2 types, as
864 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
865 // comparisons against it are simplfied to unsigned.
866 if (cast<ICmpInst>(Val: I)->isSigned())
867 return nullptr;
868 break;
869 case Instruction::Or:
870 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
871 return nullptr;
872 [[fallthrough]];
873 case Instruction::Add:
874 break;
875 }
876
877 Value *Op;
878 // Find ctpop.
879 if (!match(V: I->getOperand(i: 1 - ConstIdx),
880 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
881 return nullptr;
882
883 Constant *C;
884 // Check other operand is ImmConstant.
885 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
886 return nullptr;
887
888 Type *Ty = Op->getType();
889 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
890 // Need extra check for icmp. Note if this check is true, it generally means
891 // the icmp will simplify to true/false.
892 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
893 Constant *Cmp =
894 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
895 if (!Cmp || !Cmp->isNullValue())
896 return nullptr;
897 }
898
899 // Check we can invert `(not x)` for free.
900 bool Consumes = false;
901 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
902 return nullptr;
903 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
904 assert(NotOp != nullptr &&
905 "Desync between isFreeToInvert and getFreelyInverted");
906
907 Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
908
909 Value *R = nullptr;
910
911 // Do the transformation here to avoid potentially introducing an infinite
912 // loop.
913 switch (Opc) {
914 case Instruction::Sub:
915 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
916 break;
917 case Instruction::Or:
918 case Instruction::Add:
919 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
920 break;
921 case Instruction::ICmp:
922 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
923 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
924 break;
925 default:
926 llvm_unreachable("Unhandled Opcode");
927 }
928 assert(R != nullptr);
929 return replaceInstUsesWith(I&: *I, V: R);
930}
931
932// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
933// IFF
934// 1) the logic_shifts match
935// 2) either both binops are binops and one is `and` or
936// BinOp1 is `and`
937// (logic_shift (inv_logic_shift C1, C), C) == C1 or
938//
939// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
940//
941// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
942// IFF
943// 1) the logic_shifts match
944// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
945//
946// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
947//
948// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
949// IFF
950// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
951// 2) Binop2 is `not`
952//
953// -> (arithmetic_shift Binop1((not X), Y), Amt)
954
955Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
956 const DataLayout &DL = I.getDataLayout();
957 auto IsValidBinOpc = [](unsigned Opc) {
958 switch (Opc) {
959 default:
960 return false;
961 case Instruction::And:
962 case Instruction::Or:
963 case Instruction::Xor:
964 case Instruction::Add:
965 // Skip Sub as we only match constant masks which will canonicalize to use
966 // add.
967 return true;
968 }
969 };
970
971 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
972 // constraints.
973 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
974 unsigned ShOpc) {
975 assert(ShOpc != Instruction::AShr);
976 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
977 ShOpc == Instruction::Shl;
978 };
979
980 auto GetInvShift = [](unsigned ShOpc) {
981 assert(ShOpc != Instruction::AShr);
982 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
983 };
984
985 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
986 unsigned ShOpc, Constant *CMask,
987 Constant *CShift) {
988 // If the BinOp1 is `and` we don't need to check the mask.
989 if (BinOpc1 == Instruction::And)
990 return true;
991
992 // For all other possible transfers we need complete distributable
993 // binop/shift (anything but `add` + `lshr`).
994 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
995 return false;
996
997 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
998 // vecs, otherwise the mask will be simplified and the following check will
999 // handle it).
1000 if (BinOpc2 == Instruction::And)
1001 return true;
1002
1003 // Otherwise, need mask that meets the below requirement.
1004 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1005 Constant *MaskInvShift =
1006 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1007 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
1008 CMask;
1009 };
1010
1011 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1012 Constant *CMask, *CShift;
1013 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1014 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
1015 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
1016 return nullptr;
1017 if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum),
1018 P: m_c_BinOp(L: m_CombineAnd(
1019 L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
1020 R: m_Value(V&: ShiftedX)),
1021 R: m_Value(V&: Mask))))
1022 return nullptr;
1023 // Make sure we are matching instruction shifts and not ConstantExpr
1024 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
1025 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
1026 if (!IY || !IX)
1027 return nullptr;
1028
1029 // LHS and RHS need same shift opcode
1030 unsigned ShOpc = IY->getOpcode();
1031 if (ShOpc != IX->getOpcode())
1032 return nullptr;
1033
1034 // Make sure binop is real instruction and not ConstantExpr
1035 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
1036 if (!BO2)
1037 return nullptr;
1038
1039 unsigned BinOpc = BO2->getOpcode();
1040 // Make sure we have valid binops.
1041 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1042 return nullptr;
1043
1044 if (ShOpc == Instruction::AShr) {
1045 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
1046 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
1047 Value *NotX = Builder.CreateNot(V: X);
1048 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
1049 return BinaryOperator::Create(
1050 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1051 }
1052
1053 return nullptr;
1054 }
1055
1056 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1057 // distribute to drop the shift irrelevant of constants.
1058 if (BinOpc == I.getOpcode() &&
1059 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1060 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1061 Value *NewBinOp1 = Builder.CreateBinOp(
1062 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1063 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1064 }
1065
1066 // Otherwise we can only distribute by constant shifting the mask, so
1067 // ensure we have constants.
1068 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1069 return nullptr;
1070 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1071 return nullptr;
1072
1073 // Check if we can distribute the binops.
1074 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1075 return nullptr;
1076
1077 Constant *NewCMask =
1078 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1079 Value *NewBinOp2 = Builder.CreateBinOp(
1080 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1081 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1082 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1083 S1: NewBinOp1, S2: CShift);
1084 };
1085
1086 if (Instruction *R = MatchBinOp(0))
1087 return R;
1088 return MatchBinOp(1);
1089}
1090
1091// (Binop (zext C), (select C, T, F))
1092// -> (select C, (binop 1, T), (binop 0, F))
1093//
1094// (Binop (sext C), (select C, T, F))
1095// -> (select C, (binop -1, T), (binop 0, F))
1096//
1097// Attempt to simplify binary operations into a select with folded args, when
1098// one operand of the binop is a select instruction and the other operand is a
1099// zext/sext extension, whose value is the select condition.
1100Instruction *
1101InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1102 // TODO: this simplification may be extended to any speculatable instruction,
1103 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1104 Instruction::BinaryOps Opc = I.getOpcode();
1105 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1106 Value *A, *CondVal, *TrueVal, *FalseVal;
1107 Value *CastOp;
1108
1109 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1110 return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1111 A->getType()->getScalarSizeInBits() == 1 &&
1112 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1113 R: m_Value(V&: FalseVal)));
1114 };
1115
1116 // Make sure one side of the binop is a select instruction, and the other is a
1117 // zero/sign extension operating on a i1.
1118 if (MatchSelectAndCast(LHS, RHS))
1119 CastOp = LHS;
1120 else if (MatchSelectAndCast(RHS, LHS))
1121 CastOp = RHS;
1122 else
1123 return nullptr;
1124
1125 SelectInst *SI = ProfcheckDisableMetadataFixes
1126 ? nullptr
1127 : cast<SelectInst>(Val: CastOp == LHS ? RHS : LHS);
1128
1129 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1130 bool IsCastOpRHS = (CastOp == RHS);
1131 bool IsZExt = isa<ZExtInst>(Val: CastOp);
1132 Constant *C;
1133
1134 if (IsTrueArm) {
1135 C = Constant::getNullValue(Ty: V->getType());
1136 } else if (IsZExt) {
1137 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1138 C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1));
1139 } else {
1140 C = Constant::getAllOnesValue(Ty: V->getType());
1141 }
1142
1143 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1144 : Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1145 };
1146
1147 // If the value used in the zext/sext is the select condition, or the negated
1148 // of the select condition, the binop can be simplified.
1149 if (CondVal == A) {
1150 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1151 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1152 S2: NewFoldedConst(true, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1153 }
1154
1155 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1156 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1157 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1158 S2: NewFoldedConst(false, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1159 }
1160
1161 return nullptr;
1162}
1163
1164Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1165 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1166 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1167 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1168 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1169 Value *A, *B, *C, *D;
1170 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1171
1172 if (Op0)
1173 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1174 if (Op1)
1175 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1176
1177 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1178 // a common term.
1179 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1180 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1181 return V;
1182
1183 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1184 // term.
1185 if (Op0)
1186 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1187 if (Value *V =
1188 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1189 return V;
1190
1191 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1192 // term.
1193 if (Op1)
1194 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1195 if (Value *V =
1196 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1197 return V;
1198
1199 return nullptr;
1200}
1201
1202/// This tries to simplify binary operations which some other binary operation
1203/// distributes over either by factorizing out common terms
1204/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1205/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1206/// Returns the simplified value, or null if it didn't simplify.
1207Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1208 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1209 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1210 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1211 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1212
1213 // Factorization.
1214 if (Value *R = tryFactorizationFolds(I))
1215 return R;
1216
1217 // Expansion.
1218 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1219 // The instruction has the form "(A op' B) op C". See if expanding it out
1220 // to "(A op C) op' (B op C)" results in simplifications.
1221 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1222 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1223
1224 // Disable the use of undef because it's not safe to distribute undef.
1225 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1226 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1227 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1228
1229 // Do "A op C" and "B op C" both simplify?
1230 if (L && R) {
1231 // They do! Return "L op' R".
1232 ++NumExpand;
1233 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1234 C->takeName(V: &I);
1235 return C;
1236 }
1237
1238 // Does "A op C" simplify to the identity value for the inner opcode?
1239 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1240 // They do! Return "B op C".
1241 ++NumExpand;
1242 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1243 C->takeName(V: &I);
1244 return C;
1245 }
1246
1247 // Does "B op C" simplify to the identity value for the inner opcode?
1248 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1249 // They do! Return "A op C".
1250 ++NumExpand;
1251 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1252 C->takeName(V: &I);
1253 return C;
1254 }
1255 }
1256
1257 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1258 // The instruction has the form "A op (B op' C)". See if expanding it out
1259 // to "(A op B) op' (A op C)" results in simplifications.
1260 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1261 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1262
1263 // Disable the use of undef because it's not safe to distribute undef.
1264 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1265 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1266 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1267
1268 // Do "A op B" and "A op C" both simplify?
1269 if (L && R) {
1270 // They do! Return "L op' R".
1271 ++NumExpand;
1272 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1273 A->takeName(V: &I);
1274 return A;
1275 }
1276
1277 // Does "A op B" simplify to the identity value for the inner opcode?
1278 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1279 // They do! Return "A op C".
1280 ++NumExpand;
1281 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1282 A->takeName(V: &I);
1283 return A;
1284 }
1285
1286 // Does "A op C" simplify to the identity value for the inner opcode?
1287 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1288 // They do! Return "A op B".
1289 ++NumExpand;
1290 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1291 A->takeName(V: &I);
1292 return A;
1293 }
1294 }
1295
1296 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1297}
1298
1299static std::optional<std::pair<Value *, Value *>>
1300matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1301 if (LHS->getParent() != RHS->getParent())
1302 return std::nullopt;
1303
1304 if (LHS->getNumIncomingValues() < 2)
1305 return std::nullopt;
1306
1307 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1308 return std::nullopt;
1309
1310 Value *L0 = LHS->getIncomingValue(i: 0);
1311 Value *R0 = RHS->getIncomingValue(i: 0);
1312
1313 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1314 Value *L1 = LHS->getIncomingValue(i: I);
1315 Value *R1 = RHS->getIncomingValue(i: I);
1316
1317 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1318 continue;
1319
1320 return std::nullopt;
1321 }
1322
1323 return std::optional(std::pair(L0, R0));
1324}
1325
1326std::optional<std::pair<Value *, Value *>>
1327InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1328 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1329 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1330 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1331 return std::nullopt;
1332 switch (LHSInst->getOpcode()) {
1333 case Instruction::PHI:
1334 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1335 case Instruction::Select: {
1336 Value *Cond = LHSInst->getOperand(i: 0);
1337 Value *TrueVal = LHSInst->getOperand(i: 1);
1338 Value *FalseVal = LHSInst->getOperand(i: 2);
1339 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1340 FalseVal == RHSInst->getOperand(i: 1))
1341 return std::pair(TrueVal, FalseVal);
1342 return std::nullopt;
1343 }
1344 case Instruction::Call: {
1345 // Match min(a, b) and max(a, b)
1346 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1347 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1348 if (LHSMinMax && RHSMinMax &&
1349 LHSMinMax->getPredicate() ==
1350 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1351 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1353 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1354 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1355 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1356 return std::nullopt;
1357 }
1358 default:
1359 return std::nullopt;
1360 }
1361}
1362
1363Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1364 Value *LHS,
1365 Value *RHS) {
1366 Value *A, *B, *C, *D, *E, *F;
1367 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1368 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1369 if (!LHSIsSelect && !RHSIsSelect)
1370 return nullptr;
1371
1372 SelectInst *SI = ProfcheckDisableMetadataFixes
1373 ? nullptr
1374 : cast<SelectInst>(Val: LHSIsSelect ? LHS : RHS);
1375
1376 FastMathFlags FMF;
1377 BuilderTy::FastMathFlagGuard Guard(Builder);
1378 if (const auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) {
1379 FMF = FPOp->getFastMathFlags();
1380 Builder.setFastMathFlags(FMF);
1381 }
1382
1383 Instruction::BinaryOps Opcode = I.getOpcode();
1384 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1385
1386 Value *Cond, *True = nullptr, *False = nullptr;
1387
1388 // Special-case for add/negate combination. Replace the zero in the negation
1389 // with the trailing add operand:
1390 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1391 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1392 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1393 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1394 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1395 return nullptr;
1396 Value *N;
1397 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1398 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1399 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName(), MDFrom: SI);
1400 }
1401 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1402 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1403 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName(), MDFrom: SI);
1404 }
1405 return nullptr;
1406 };
1407
1408 if (LHSIsSelect && RHSIsSelect && A == D) {
1409 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1410 Cond = A;
1411 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1412 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1413
1414 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1415 if (False && !True)
1416 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1417 else if (True && !False)
1418 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1419 }
1420 } else if (LHSIsSelect && LHS->hasOneUse()) {
1421 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1422 Cond = A;
1423 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1424 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1425 if (Value *NewSel = foldAddNegate(B, C, RHS))
1426 return NewSel;
1427 } else if (RHSIsSelect && RHS->hasOneUse()) {
1428 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1429 Cond = D;
1430 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1431 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1432 if (Value *NewSel = foldAddNegate(E, F, LHS))
1433 return NewSel;
1434 }
1435
1436 if (!True || !False)
1437 return nullptr;
1438
1439 Value *NewSI = Builder.CreateSelect(C: Cond, True, False, Name: I.getName(), MDFrom: SI);
1440 NewSI->takeName(V: &I);
1441 return NewSI;
1442}
1443
1444/// Freely adapt every user of V as-if V was changed to !V.
1445/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1446void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1447 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1448 for (User *U : make_early_inc_range(Range: I->users())) {
1449 if (U == IgnoredUser)
1450 continue; // Don't consider this user.
1451 switch (cast<Instruction>(Val: U)->getOpcode()) {
1452 case Instruction::Select: {
1453 auto *SI = cast<SelectInst>(Val: U);
1454 SI->swapValues();
1455 SI->swapProfMetadata();
1456 break;
1457 }
1458 case Instruction::Br: {
1459 BranchInst *BI = cast<BranchInst>(Val: U);
1460 BI->swapSuccessors(); // swaps prof metadata too
1461 if (BPI)
1462 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1463 break;
1464 }
1465 case Instruction::Xor:
1466 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1467 // Add to worklist for DCE.
1468 addToWorklist(I: cast<Instruction>(Val: U));
1469 break;
1470 default:
1471 llvm_unreachable("Got unexpected user - out of sync with "
1472 "canFreelyInvertAllUsersOf() ?");
1473 }
1474 }
1475
1476 // Update pre-existing debug value uses.
1477 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1478 llvm::findDbgValues(V: I, DbgVariableRecords);
1479
1480 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1481 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1482 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1483 Idx != End; ++Idx)
1484 if (DbgVal->getVariableLocationOp(OpIdx: Idx) == I)
1485 DbgVal->setExpression(
1486 DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1487 }
1488}
1489
1490/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1491/// constant zero (which is the 'negate' form).
1492Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1493 Value *NegV;
1494 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1495 return NegV;
1496
1497 // Constants can be considered to be negated values if they can be folded.
1498 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1499 return ConstantExpr::getNeg(C);
1500
1501 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1502 if (C->getType()->getElementType()->isIntegerTy())
1503 return ConstantExpr::getNeg(C);
1504
1505 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1506 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1507 Constant *Elt = CV->getAggregateElement(Elt: i);
1508 if (!Elt)
1509 return nullptr;
1510
1511 if (isa<UndefValue>(Val: Elt))
1512 continue;
1513
1514 if (!isa<ConstantInt>(Val: Elt))
1515 return nullptr;
1516 }
1517 return ConstantExpr::getNeg(C: CV);
1518 }
1519
1520 // Negate integer vector splats.
1521 if (auto *CV = dyn_cast<Constant>(Val: V))
1522 if (CV->getType()->isVectorTy() &&
1523 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1524 return ConstantExpr::getNeg(C: CV);
1525
1526 return nullptr;
1527}
1528
1529// Try to fold:
1530// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1531// -> ({s|u}itofp (int_binop x, y))
1532// 2) (fp_binop ({s|u}itofp x), FpC)
1533// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1534//
1535// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1536Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1537 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1538 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1539
1540 Type *FPTy = BO.getType();
1541 Type *IntTy = IntOps[0]->getType();
1542
1543 unsigned IntSz = IntTy->getScalarSizeInBits();
1544 // This is the maximum number of inuse bits by the integer where the int -> fp
1545 // casts are exact.
1546 unsigned MaxRepresentableBits =
1547 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1548
1549 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1550 // checks later on.
1551 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1552
1553 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1554 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1555 auto IsNonZero = [&](unsigned OpNo) -> bool {
1556 if (OpsKnown[OpNo].hasKnownBits() &&
1557 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1558 return true;
1559 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1560 };
1561
1562 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1563 // NB: This matches the impl in ValueTracking, we just try to use cached
1564 // knownbits here. If we ever start supporting WithCache for
1565 // `isKnownNonNegative`, change this to an explicit call.
1566 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1567 };
1568
1569 // Check if we know for certain that ({s|u}itofp op) is exact.
1570 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1571 // Can we treat this operand as the desired sign?
1572 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1573 !IsNonNeg(OpNo))
1574 return false;
1575
1576 // If fp precision >= bitwidth(op) then its exact.
1577 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1578 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1579 // handled specially. We can't, however, increase the bound arbitrarily for
1580 // `sitofp` as for larger sizes, it won't sign extend.
1581 if (MaxRepresentableBits < IntSz) {
1582 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1583 // numSignBits(op).
1584 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1585 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1586 if (OpsFromSigned)
1587 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1588 // Finally for unsigned check that fp precision >= bitwidth(op) -
1589 // numLeadingZeros(op).
1590 else {
1591 NumUsedLeadingBits[OpNo] =
1592 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1593 }
1594 }
1595 // NB: We could also check if op is known to be a power of 2 or zero (which
1596 // will always be representable). Its unlikely, however, that is we are
1597 // unable to bound op in any way we will be able to pass the overflow checks
1598 // later on.
1599
1600 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1601 return false;
1602 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1603 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1604 IsNonZero(OpNo);
1605 };
1606
1607 // If we have a constant rhs, see if we can losslessly convert it to an int.
1608 if (Op1FpC != nullptr) {
1609 // Signed + Mul req non-zero
1610 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1611 !match(V: Op1FpC, P: m_NonZeroFP()))
1612 return nullptr;
1613
1614 Constant *Op1IntC = ConstantFoldCastOperand(
1615 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1616 DestTy: IntTy, DL);
1617 if (Op1IntC == nullptr)
1618 return nullptr;
1619 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1620 : Instruction::UIToFP,
1621 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1622 return nullptr;
1623
1624 // First try to keep sign of cast the same.
1625 IntOps[1] = Op1IntC;
1626 }
1627
1628 // Ensure lhs/rhs integer types match.
1629 if (IntTy != IntOps[1]->getType())
1630 return nullptr;
1631
1632 if (Op1FpC == nullptr) {
1633 if (!IsValidPromotion(1))
1634 return nullptr;
1635 }
1636 if (!IsValidPromotion(0))
1637 return nullptr;
1638
1639 // Final we check if the integer version of the binop will not overflow.
1640 BinaryOperator::BinaryOps IntOpc;
1641 // Because of the precision check, we can often rule out overflows.
1642 bool NeedsOverflowCheck = true;
1643 // Try to conservatively rule out overflow based on the already done precision
1644 // checks.
1645 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1646 unsigned OverflowMaxCurBits =
1647 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1648 bool OutputSigned = OpsFromSigned;
1649 switch (BO.getOpcode()) {
1650 case Instruction::FAdd:
1651 IntOpc = Instruction::Add;
1652 OverflowMaxOutputBits += OverflowMaxCurBits;
1653 break;
1654 case Instruction::FSub:
1655 IntOpc = Instruction::Sub;
1656 OverflowMaxOutputBits += OverflowMaxCurBits;
1657 break;
1658 case Instruction::FMul:
1659 IntOpc = Instruction::Mul;
1660 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1661 break;
1662 default:
1663 llvm_unreachable("Unsupported binop");
1664 }
1665 // The precision check may have already ruled out overflow.
1666 if (OverflowMaxOutputBits < IntSz) {
1667 NeedsOverflowCheck = false;
1668 // We can bound unsigned overflow from sub to in range signed value (this is
1669 // what allows us to avoid the overflow check for sub).
1670 if (IntOpc == Instruction::Sub)
1671 OutputSigned = true;
1672 }
1673
1674 // Precision check did not rule out overflow, so need to check.
1675 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1676 // `IntOps[...]` arguments to `KnownOps[...]`.
1677 if (NeedsOverflowCheck &&
1678 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1679 return nullptr;
1680
1681 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1682 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1683 IntBO->setHasNoSignedWrap(OutputSigned);
1684 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1685 }
1686 if (OutputSigned)
1687 return new SIToFPInst(IntBinOp, FPTy);
1688 return new UIToFPInst(IntBinOp, FPTy);
1689}
1690
1691// Try to fold:
1692// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1693// -> ({s|u}itofp (int_binop x, y))
1694// 2) (fp_binop ({s|u}itofp x), FpC)
1695// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1696Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1697 // Don't perform the fold on vectors, as the integer operation may be much
1698 // more expensive than the float operation in that case.
1699 if (BO.getType()->isVectorTy())
1700 return nullptr;
1701
1702 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1703 Constant *Op1FpC = nullptr;
1704 // Check for:
1705 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1706 // 2) (binop ({s|u}itofp x), FpC)
1707 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) &&
1708 !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0]))))
1709 return nullptr;
1710
1711 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1712 !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) &&
1713 !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1]))))
1714 return nullptr;
1715
1716 // Cache KnownBits a bit to potentially save some analysis.
1717 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1718
1719 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1720 // different constraints depending on the sign of the cast.
1721 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1722 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1723 IntOps, Op1FpC, OpsKnown))
1724 return R;
1725 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1726 Op1FpC, OpsKnown);
1727}
1728
1729/// A binop with a constant operand and a sign-extended boolean operand may be
1730/// converted into a select of constants by applying the binary operation to
1731/// the constant with the two possible values of the extended boolean (0 or -1).
1732Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1733 // TODO: Handle non-commutative binop (constant is operand 0).
1734 // TODO: Handle zext.
1735 // TODO: Peek through 'not' of cast.
1736 Value *BO0 = BO.getOperand(i_nocapture: 0);
1737 Value *BO1 = BO.getOperand(i_nocapture: 1);
1738 Value *X;
1739 Constant *C;
1740 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1741 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1742 return nullptr;
1743
1744 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1745 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1746 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1747 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1748 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1749 return createSelectInstWithUnknownProfile(C: X, S1: TVal, S2: FVal);
1750}
1751
1752static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1753 bool IsTrueArm) {
1754 SmallVector<Value *> Ops;
1755 for (Value *Op : I.operands()) {
1756 Value *V = nullptr;
1757 if (Op == SI) {
1758 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1759 } else if (match(V: SI->getCondition(),
1760 P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1761 : ICmpInst::ICMP_NE,
1762 L: m_Specific(V: Op), R: m_Value(V))) &&
1763 isGuaranteedNotToBeUndefOrPoison(V)) {
1764 // Pass
1765 } else if (match(V: Op, P: m_ZExt(Op: m_Specific(V: SI->getCondition())))) {
1766 V = IsTrueArm ? ConstantInt::get(Ty: Op->getType(), V: 1)
1767 : ConstantInt::getNullValue(Ty: Op->getType());
1768 } else {
1769 V = Op;
1770 }
1771 Ops.push_back(Elt: V);
1772 }
1773
1774 return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1775}
1776
1777static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1778 Value *NewOp, InstCombiner &IC) {
1779 Instruction *Clone = I.clone();
1780 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1781 Clone->dropUBImplyingAttrsAndMetadata();
1782 IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1783 return Clone;
1784}
1785
1786Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1787 bool FoldWithMultiUse,
1788 bool SimplifyBothArms) {
1789 // Don't modify shared select instructions unless set FoldWithMultiUse
1790 if (!SI->hasOneUser() && !FoldWithMultiUse)
1791 return nullptr;
1792
1793 Value *TV = SI->getTrueValue();
1794 Value *FV = SI->getFalseValue();
1795
1796 // Bool selects with constant operands can be folded to logical ops.
1797 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1798 return nullptr;
1799
1800 // Avoid breaking min/max reduction pattern,
1801 // which is necessary for vectorization later.
1802 if (isa<MinMaxIntrinsic>(Val: &Op))
1803 for (Value *IntrinOp : Op.operands())
1804 if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1805 for (Value *PhiOp : PN->operands())
1806 if (PhiOp == &Op)
1807 return nullptr;
1808
1809 // Test if a FCmpInst instruction is used exclusively by a select as
1810 // part of a minimum or maximum operation. If so, refrain from doing
1811 // any other folding. This helps out other analyses which understand
1812 // non-obfuscated minimum and maximum idioms. And in this case, at
1813 // least one of the comparison operands has at least one user besides
1814 // the compare (the select), which would often largely negate the
1815 // benefit of folding anyway.
1816 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1817 if (CI->hasOneUse()) {
1818 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1819 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1820 !CI->isCommutative())
1821 return nullptr;
1822 }
1823 }
1824
1825 // Make sure that one of the select arms folds successfully.
1826 Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true);
1827 Value *NewFV =
1828 simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false);
1829 if (!NewTV && !NewFV)
1830 return nullptr;
1831
1832 if (SimplifyBothArms && !(NewTV && NewFV))
1833 return nullptr;
1834
1835 // Create an instruction for the arm that did not fold.
1836 if (!NewTV)
1837 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1838 if (!NewFV)
1839 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1840 return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1841}
1842
1843static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1844 Value *InValue, BasicBlock *InBB,
1845 const DataLayout &DL,
1846 const SimplifyQuery SQ) {
1847 // NB: It is a precondition of this transform that the operands be
1848 // phi translatable!
1849 SmallVector<Value *> Ops;
1850 for (Value *Op : I.operands()) {
1851 if (Op == PN)
1852 Ops.push_back(Elt: InValue);
1853 else
1854 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1855 }
1856
1857 // Don't consider the simplification successful if we get back a constant
1858 // expression. That's just an instruction in hiding.
1859 // Also reject the case where we simplify back to the phi node. We wouldn't
1860 // be able to remove it in that case.
1861 Value *NewVal = simplifyInstructionWithOperands(
1862 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1863 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1864 return NewVal;
1865
1866 // Check if incoming PHI value can be replaced with constant
1867 // based on implied condition.
1868 BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1869 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1870 if (TerminatorBI && TerminatorBI->isConditional() &&
1871 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1872 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1873 std::optional<bool> ImpliedCond = isImpliedCondition(
1874 LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1],
1875 DL, LHSIsTrue);
1876 if (ImpliedCond)
1877 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1878 }
1879
1880 return nullptr;
1881}
1882
1883/// In some cases it is beneficial to fold a select into a binary operator.
1884/// For example:
1885/// %1 = or %in, 4
1886/// %2 = select %cond, %1, %in
1887/// %3 = or %2, 1
1888/// =>
1889/// %1 = select i1 %cond, 5, 1
1890/// %2 = or %1, %in
1891Instruction *InstCombinerImpl::foldBinOpSelectBinOp(BinaryOperator &Op) {
1892 assert(Op.isAssociative() && "The operation must be associative!");
1893
1894 SelectInst *SI = dyn_cast<SelectInst>(Val: Op.getOperand(i_nocapture: 0));
1895
1896 Constant *Const;
1897 if (!SI || !match(V: Op.getOperand(i_nocapture: 1), P: m_ImmConstant(C&: Const)) ||
1898 !Op.hasOneUse() || !SI->hasOneUse())
1899 return nullptr;
1900
1901 Value *TV = SI->getTrueValue();
1902 Value *FV = SI->getFalseValue();
1903 Value *Input, *NewTV, *NewFV;
1904 Constant *Const2;
1905
1906 if (TV->hasOneUse() && match(V: TV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: FV),
1907 R: m_ImmConstant(C&: Const2)))) {
1908 NewTV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1909 NewFV = Const;
1910 Input = FV;
1911 } else if (FV->hasOneUse() &&
1912 match(V: FV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: TV),
1913 R: m_ImmConstant(C&: Const2)))) {
1914 NewTV = Const;
1915 NewFV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1916 Input = TV;
1917 } else
1918 return nullptr;
1919
1920 if (!NewTV || !NewFV)
1921 return nullptr;
1922
1923 Value *NewSI =
1924 Builder.CreateSelect(C: SI->getCondition(), True: NewTV, False: NewFV, Name: "",
1925 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : SI);
1926 return BinaryOperator::Create(Op: Op.getOpcode(), S1: NewSI, S2: Input);
1927}
1928
1929Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1930 bool AllowMultipleUses) {
1931 unsigned NumPHIValues = PN->getNumIncomingValues();
1932 if (NumPHIValues == 0)
1933 return nullptr;
1934
1935 // We normally only transform phis with a single use. However, if a PHI has
1936 // multiple uses and they are all the same operation, we can fold *all* of the
1937 // uses into the PHI.
1938 bool OneUse = PN->hasOneUse();
1939 bool IdenticalUsers = false;
1940 if (!AllowMultipleUses && !OneUse) {
1941 // Walk the use list for the instruction, comparing them to I.
1942 for (User *U : PN->users()) {
1943 Instruction *UI = cast<Instruction>(Val: U);
1944 if (UI != &I && !I.isIdenticalTo(I: UI))
1945 return nullptr;
1946 }
1947 // Otherwise, we can replace *all* users with the new PHI we form.
1948 IdenticalUsers = true;
1949 }
1950
1951 // Check that all operands are phi-translatable.
1952 for (Value *Op : I.operands()) {
1953 if (Op == PN)
1954 continue;
1955
1956 // Non-instructions never require phi-translation.
1957 auto *I = dyn_cast<Instruction>(Val: Op);
1958 if (!I)
1959 continue;
1960
1961 // Phi-translate can handle phi nodes in the same block.
1962 if (isa<PHINode>(Val: I))
1963 if (I->getParent() == PN->getParent())
1964 continue;
1965
1966 // Operand dominates the block, no phi-translation necessary.
1967 if (DT.dominates(Def: I, BB: PN->getParent()))
1968 continue;
1969
1970 // Not phi-translatable, bail out.
1971 return nullptr;
1972 }
1973
1974 // Check to see whether the instruction can be folded into each phi operand.
1975 // If there is one operand that does not fold, remember the BB it is in.
1976 SmallVector<Value *> NewPhiValues;
1977 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1978 bool SeenNonSimplifiedInVal = false;
1979 for (unsigned i = 0; i != NumPHIValues; ++i) {
1980 Value *InVal = PN->getIncomingValue(i);
1981 BasicBlock *InBB = PN->getIncomingBlock(i);
1982
1983 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1984 NewPhiValues.push_back(Elt: NewVal);
1985 continue;
1986 }
1987
1988 // Handle some cases that can't be fully simplified, but where we know that
1989 // the two instructions will fold into one.
1990 auto WillFold = [&]() {
1991 if (!InVal->hasUseList() || !InVal->hasOneUser())
1992 return false;
1993
1994 // icmp of ucmp/scmp with constant will fold to icmp.
1995 const APInt *Ignored;
1996 if (isa<CmpIntrinsic>(Val: InVal) &&
1997 match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
1998 return true;
1999
2000 // icmp eq zext(bool), 0 will fold to !bool.
2001 if (isa<ZExtInst>(Val: InVal) &&
2002 cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) &&
2003 match(V: &I,
2004 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
2005 return true;
2006
2007 return false;
2008 };
2009
2010 if (WillFold()) {
2011 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2012 NewPhiValues.push_back(Elt: nullptr);
2013 continue;
2014 }
2015
2016 if (!OneUse && !IdenticalUsers)
2017 return nullptr;
2018
2019 if (SeenNonSimplifiedInVal)
2020 return nullptr; // More than one non-simplified value.
2021 SeenNonSimplifiedInVal = true;
2022
2023 // If there is exactly one non-simplified value, we can insert a copy of the
2024 // operation in that block. However, if this is a critical edge, we would
2025 // be inserting the computation on some other paths (e.g. inside a loop).
2026 // Only do this if the pred block is unconditionally branching into the phi
2027 // block. Also, make sure that the pred block is not dead code.
2028 BranchInst *BI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
2029 if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(A: InBB))
2030 return nullptr;
2031
2032 NewPhiValues.push_back(Elt: nullptr);
2033 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2034
2035 // Do not push the operation across a loop backedge. This could result in
2036 // an infinite combine loop, and is generally non-profitable (especially
2037 // if the operation was originally outside the loop).
2038 if (isBackEdge(From: InBB, To: PN->getParent()))
2039 return nullptr;
2040 }
2041
2042 // Clone the instruction that uses the phi node and move it into the incoming
2043 // BB because we know that the next iteration of InstCombine will simplify it.
2044 SmallDenseMap<BasicBlock *, Instruction *> Clones;
2045 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2046 Value *Op = PN->getIncomingValue(i: OpIndex);
2047 BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
2048
2049 Instruction *Clone = Clones.lookup(Val: OpBB);
2050 if (!Clone) {
2051 Clone = I.clone();
2052 for (Use &U : Clone->operands()) {
2053 if (U == PN)
2054 U = Op;
2055 else
2056 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
2057 }
2058 Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
2059 Clones.insert(KV: {OpBB, Clone});
2060 // We may have speculated the instruction.
2061 Clone->dropUBImplyingAttrsAndMetadata();
2062 }
2063
2064 NewPhiValues[OpIndex] = Clone;
2065 }
2066
2067 // Okay, we can do the transformation: create the new PHI node.
2068 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
2069 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
2070 NewPN->takeName(V: PN);
2071 NewPN->setDebugLoc(PN->getDebugLoc());
2072
2073 for (unsigned i = 0; i != NumPHIValues; ++i)
2074 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
2075
2076 if (IdenticalUsers) {
2077 // Collect and deduplicate users up-front to avoid iterator invalidation.
2078 SmallSetVector<Instruction *, 4> ToReplace;
2079 for (User *U : PN->users()) {
2080 Instruction *User = cast<Instruction>(Val: U);
2081 if (User == &I)
2082 continue;
2083 ToReplace.insert(X: User);
2084 }
2085 for (Instruction *I : ToReplace) {
2086 replaceInstUsesWith(I&: *I, V: NewPN);
2087 eraseInstFromFunction(I&: *I);
2088 }
2089 OneUse = true;
2090 }
2091
2092 if (OneUse) {
2093 replaceAllDbgUsesWith(From&: *PN, To&: *NewPN, DomPoint&: *PN, DT);
2094 }
2095 return replaceInstUsesWith(I, V: NewPN);
2096}
2097
2098Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
2099 if (!BO.isAssociative())
2100 return nullptr;
2101
2102 // Find the interleaved binary ops.
2103 auto Opc = BO.getOpcode();
2104 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0));
2105 auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1));
2106 if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) ||
2107 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2108 !BO0->isAssociative() || !BO1->isAssociative() ||
2109 BO0->getParent() != BO1->getParent())
2110 return nullptr;
2111
2112 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2113 "Expected commutative instructions!");
2114
2115 // Find the matching phis, forming the recurrences.
2116 PHINode *PN0, *PN1;
2117 Value *Start0, *Step0, *Start1, *Step1;
2118 if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() ||
2119 !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() ||
2120 PN0->getParent() != PN1->getParent())
2121 return nullptr;
2122
2123 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2124 "Expected PHIs with two incoming values!");
2125
2126 // Convert the start and step values to constants.
2127 auto *Init0 = dyn_cast<Constant>(Val: Start0);
2128 auto *Init1 = dyn_cast<Constant>(Val: Start1);
2129 auto *C0 = dyn_cast<Constant>(Val: Step0);
2130 auto *C1 = dyn_cast<Constant>(Val: Step1);
2131 if (!Init0 || !Init1 || !C0 || !C1)
2132 return nullptr;
2133
2134 // Fold the recurrence constants.
2135 auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2136 auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2137 if (!Init || !C)
2138 return nullptr;
2139
2140 // Create the reduced PHI.
2141 auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2142 NameStr: "reduced.phi");
2143
2144 // Create the new binary op.
2145 auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2146 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2147 // Intersect FMF flags for FADD and FMUL.
2148 FastMathFlags Intersect = BO0->getFastMathFlags() &
2149 BO1->getFastMathFlags() & BO.getFastMathFlags();
2150 NewBO->setFastMathFlags(Intersect);
2151 } else {
2152 OverflowTracking Flags;
2153 Flags.AllKnownNonNegative = false;
2154 Flags.AllKnownNonZero = false;
2155 Flags.mergeFlags(I&: *BO0);
2156 Flags.mergeFlags(I&: *BO1);
2157 Flags.mergeFlags(I&: BO);
2158 Flags.applyFlags(I&: *NewBO);
2159 }
2160 NewBO->takeName(V: &BO);
2161
2162 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2163 auto *V = PN0->getIncomingValue(i: I);
2164 auto *BB = PN0->getIncomingBlock(i: I);
2165 if (V == Init0) {
2166 assert(((PN1->getIncomingValue(0) == Init1 &&
2167 PN1->getIncomingBlock(0) == BB) ||
2168 (PN1->getIncomingValue(1) == Init1 &&
2169 PN1->getIncomingBlock(1) == BB)) &&
2170 "Invalid incoming block!");
2171 NewPN->addIncoming(V: Init, BB);
2172 } else if (V == BO0) {
2173 assert(((PN1->getIncomingValue(0) == BO1 &&
2174 PN1->getIncomingBlock(0) == BB) ||
2175 (PN1->getIncomingValue(1) == BO1 &&
2176 PN1->getIncomingBlock(1) == BB)) &&
2177 "Invalid incoming block!");
2178 NewPN->addIncoming(V: NewBO, BB);
2179 } else
2180 llvm_unreachable("Unexpected incoming value!");
2181 }
2182
2183 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2184 << "\n with " << *PN1 << "\n " << *BO1
2185 << '\n');
2186
2187 // Insert the new recurrence and remove the old (dead) ones.
2188 InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2189 InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2190
2191 eraseInstFromFunction(
2192 I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType())));
2193 eraseInstFromFunction(
2194 I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType())));
2195 eraseInstFromFunction(I&: *PN0);
2196 eraseInstFromFunction(I&: *PN1);
2197
2198 return replaceInstUsesWith(I&: BO, V: NewBO);
2199}
2200
2201Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2202 // Attempt to fold binary operators whose operands are simple recurrences.
2203 if (auto *NewBO = foldBinopWithRecurrence(BO))
2204 return NewBO;
2205
2206 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2207 // we are guarding against replicating the binop in >1 predecessor.
2208 // This could miss matching a phi with 2 constant incoming values.
2209 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
2210 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
2211 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2212 Phi0->getNumOperands() != Phi1->getNumOperands())
2213 return nullptr;
2214
2215 // TODO: Remove the restriction for binop being in the same block as the phis.
2216 if (BO.getParent() != Phi0->getParent() ||
2217 BO.getParent() != Phi1->getParent())
2218 return nullptr;
2219
2220 // Fold if there is at least one specific constant value in phi0 or phi1's
2221 // incoming values that comes from the same block and this specific constant
2222 // value can be used to do optimization for specific binary operator.
2223 // For example:
2224 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2225 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2226 // %add = add i32 %phi0, %phi1
2227 // ==>
2228 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2229 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2230 /*AllowRHSConstant*/ false);
2231 if (C) {
2232 SmallVector<Value *, 4> NewIncomingValues;
2233 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2234 auto &Phi0Use = std::get<0>(t&: T);
2235 auto &Phi1Use = std::get<1>(t&: T);
2236 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2237 return false;
2238 Value *Phi0UseV = Phi0Use.get();
2239 Value *Phi1UseV = Phi1Use.get();
2240 if (Phi0UseV == C)
2241 NewIncomingValues.push_back(Elt: Phi1UseV);
2242 else if (Phi1UseV == C)
2243 NewIncomingValues.push_back(Elt: Phi0UseV);
2244 else
2245 return false;
2246 return true;
2247 };
2248
2249 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2250 P: CanFoldIncomingValuePair)) {
2251 PHINode *NewPhi =
2252 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2253 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2254 "The number of collected incoming values should equal the number "
2255 "of the original PHINode operands!");
2256 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2257 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
2258 return NewPhi;
2259 }
2260 }
2261
2262 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2263 return nullptr;
2264
2265 // Match a pair of incoming constants for one of the predecessor blocks.
2266 BasicBlock *ConstBB, *OtherBB;
2267 Constant *C0, *C1;
2268 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
2269 ConstBB = Phi0->getIncomingBlock(i: 0);
2270 OtherBB = Phi0->getIncomingBlock(i: 1);
2271 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
2272 ConstBB = Phi0->getIncomingBlock(i: 1);
2273 OtherBB = Phi0->getIncomingBlock(i: 0);
2274 } else {
2275 return nullptr;
2276 }
2277 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2278 return nullptr;
2279
2280 // The block that we are hoisting to must reach here unconditionally.
2281 // Otherwise, we could be speculatively executing an expensive or
2282 // non-speculative op.
2283 auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
2284 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
2285 !DT.isReachableFromEntry(A: OtherBB))
2286 return nullptr;
2287
2288 // TODO: This check could be tightened to only apply to binops (div/rem) that
2289 // are not safe to speculatively execute. But that could allow hoisting
2290 // potentially expensive instructions (fdiv for example).
2291 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2292 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2293 return nullptr;
2294
2295 // Fold constants for the predecessor block with constant incoming values.
2296 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2297 if (!NewC)
2298 return nullptr;
2299
2300 // Make a new binop in the predecessor block with the non-constant incoming
2301 // values.
2302 Builder.SetInsertPoint(PredBlockBranch);
2303 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2304 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2305 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2306 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2307 NotFoldedNewBO->copyIRFlags(V: &BO);
2308
2309 // Replace the binop with a phi of the new values. The old phis are dead.
2310 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
2311 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2312 NewPhi->addIncoming(V: NewC, BB: ConstBB);
2313 return NewPhi;
2314}
2315
2316Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2317 bool IsOtherParamConst = isa<Constant>(Val: I.getOperand(i_nocapture: 1));
2318
2319 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) {
2320 if (Instruction *NewSel =
2321 FoldOpIntoSelect(Op&: I, SI: Sel, FoldWithMultiUse: false, SimplifyBothArms: !IsOtherParamConst))
2322 return NewSel;
2323 } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) {
2324 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
2325 return NewPhi;
2326 }
2327 return nullptr;
2328}
2329
2330static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2331 // If this GEP has only 0 indices, it is the same pointer as
2332 // Src. If Src is not a trivial GEP too, don't combine
2333 // the indices.
2334 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2335 !Src.hasOneUse())
2336 return false;
2337 return true;
2338}
2339
2340/// Find a constant NewC that has property:
2341/// shuffle(NewC, ShMask) = C
2342/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2343///
2344/// A 1-to-1 mapping is not required. Example:
2345/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2346Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
2347 VectorType *NewCTy) {
2348 if (isa<ScalableVectorType>(Val: NewCTy)) {
2349 Constant *Splat = C->getSplatValue();
2350 if (!Splat)
2351 return nullptr;
2352 return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2353 }
2354
2355 if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2356 cast<FixedVectorType>(Val: C->getType())->getNumElements())
2357 return nullptr;
2358
2359 unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2360 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2361 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2362 unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2363 for (unsigned I = 0; I < NumElts; ++I) {
2364 Constant *CElt = C->getAggregateElement(Elt: I);
2365 if (ShMask[I] >= 0) {
2366 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2367 Constant *NewCElt = NewVecC[ShMask[I]];
2368 // Bail out if:
2369 // 1. The constant vector contains a constant expression.
2370 // 2. The shuffle needs an element of the constant vector that can't
2371 // be mapped to a new constant vector.
2372 // 3. This is a widening shuffle that copies elements of V1 into the
2373 // extended elements (extending with poison is allowed).
2374 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2375 I >= NewCNumElts)
2376 return nullptr;
2377 NewVecC[ShMask[I]] = CElt;
2378 }
2379 }
2380 return ConstantVector::get(V: NewVecC);
2381}
2382
2383// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2384static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
2385 Constant *Splat, bool SplatLHS,
2386 const DataLayout &DL) {
2387 ElementCount EC = cast<VectorType>(Val: Vector->getType())->getElementCount();
2388 Constant *LHS = ConstantVector::getSplat(EC, Elt: Splat);
2389 Constant *RHS = Vector;
2390 if (!SplatLHS)
2391 std::swap(a&: LHS, b&: RHS);
2392 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2393}
2394
2395template <Intrinsic::ID SpliceID>
2396static Instruction *foldSpliceBinOp(BinaryOperator &Inst,
2397 InstCombiner::BuilderTy &Builder) {
2398 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2399 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2400 Value *V = Builder.CreateBinOp(Opc: Inst.getOpcode(), LHS: X, RHS: Y, Name: Inst.getName());
2401 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2402 BO->copyIRFlags(V: &Inst);
2403 Module *M = Inst.getModule();
2404 Function *F = Intrinsic::getOrInsertDeclaration(M, id: SpliceID, Tys: V->getType());
2405 return CallInst::Create(Func: F, Args: {V, PoisonValue::get(T: V->getType()), Offset});
2406 };
2407 Value *V1, *V2, *Offset;
2408 if (match(LHS,
2409 m_Intrinsic<SpliceID>(m_Value(V&: V1), m_Poison(), m_Value(V&: Offset)))) {
2410 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2411 // -> splice(Op(V1, V2), poison, offset)
2412 if (match(RHS, m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2413 m_Specific(V: Offset))) &&
2414 (LHS->hasOneUse() || RHS->hasOneUse() ||
2415 (LHS == RHS && LHS->hasNUses(N: 2))))
2416 return CreateBinOpSplice(V1, V2, Offset);
2417
2418 // Op(splice(V1, poison, offset), RHSSplat)
2419 // -> splice(Op(V1, RHSSplat), poison, offset)
2420 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2421 return CreateBinOpSplice(V1, RHS, Offset);
2422 }
2423 // Op(LHSSplat, splice(V2, poison, offset))
2424 // -> splice(Op(LHSSplat, V2), poison, offset)
2425 else if (isSplatValue(V: LHS) &&
2426 match(RHS, m_OneUse(m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2427 m_Value(V&: Offset)))))
2428 return CreateBinOpSplice(LHS, V2, Offset);
2429
2430 // TODO: Fold binops of the form
2431 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2432 // -> splice(poison, Op(V1, V2), offset)
2433
2434 return nullptr;
2435}
2436
2437Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2438 if (!isa<VectorType>(Val: Inst.getType()))
2439 return nullptr;
2440
2441 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2442 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2443 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2444 cast<VectorType>(Inst.getType())->getElementCount());
2445 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2446 cast<VectorType>(Inst.getType())->getElementCount());
2447
2448 auto foldConstantsThroughSubVectorInsertSplat =
2449 [&](Value *MaybeSubVector, Value *MaybeSplat,
2450 bool SplatLHS) -> Instruction * {
2451 Value *Idx;
2452 Constant *Splat, *SubVector, *Dest;
2453 if (!match(V: MaybeSplat, P: m_ConstantSplat(SubPattern: m_Constant(C&: Splat))) ||
2454 !match(V: MaybeSubVector,
2455 P: m_VectorInsert(Op0: m_Constant(C&: Dest), Op1: m_Constant(C&: SubVector),
2456 Op2: m_Value(V&: Idx))))
2457 return nullptr;
2458 SubVector =
2459 constantFoldBinOpWithSplat(Opcode, Vector: SubVector, Splat, SplatLHS, DL);
2460 Dest = constantFoldBinOpWithSplat(Opcode, Vector: Dest, Splat, SplatLHS, DL);
2461 if (!SubVector || !Dest)
2462 return nullptr;
2463 auto *InsertVector =
2464 Builder.CreateInsertVector(DstType: Dest->getType(), SrcVec: Dest, SubVec: SubVector, Idx);
2465 return replaceInstUsesWith(I&: Inst, V: InsertVector);
2466 };
2467
2468 // If one operand is a constant splat and the other operand is a
2469 // `vector.insert` where both the destination and subvector are constant,
2470 // apply the operation to both the destination and subvector, returning a new
2471 // constant `vector.insert`. This helps constant folding for scalable vectors.
2472 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2473 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2474 return Folded;
2475 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2476 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2477 return Folded;
2478
2479 // If both operands of the binop are vector concatenations, then perform the
2480 // narrow binop on each pair of the source operands followed by concatenation
2481 // of the results.
2482 Value *L0, *L1, *R0, *R1;
2483 ArrayRef<int> Mask;
2484 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2485 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2486 LHS->hasOneUse() && RHS->hasOneUse() &&
2487 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2488 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2489 // This transform does not have the speculative execution constraint as
2490 // below because the shuffle is a concatenation. The new binops are
2491 // operating on exactly the same elements as the existing binop.
2492 // TODO: We could ease the mask requirement to allow different undef lanes,
2493 // but that requires an analysis of the binop-with-undef output value.
2494 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2495 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2496 BO->copyIRFlags(V: &Inst);
2497 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2498 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2499 BO->copyIRFlags(V: &Inst);
2500 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2501 }
2502
2503 auto createBinOpReverse = [&](Value *X, Value *Y) {
2504 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2505 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2506 BO->copyIRFlags(V: &Inst);
2507 Module *M = Inst.getModule();
2508 Function *F = Intrinsic::getOrInsertDeclaration(
2509 M, id: Intrinsic::vector_reverse, Tys: V->getType());
2510 return CallInst::Create(Func: F, Args: V);
2511 };
2512
2513 // NOTE: Reverse shuffles don't require the speculative execution protection
2514 // below because they don't affect which lanes take part in the computation.
2515
2516 Value *V1, *V2;
2517 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2518 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2519 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2520 (LHS->hasOneUse() || RHS->hasOneUse() ||
2521 (LHS == RHS && LHS->hasNUses(N: 2))))
2522 return createBinOpReverse(V1, V2);
2523
2524 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2525 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2526 return createBinOpReverse(V1, RHS);
2527 }
2528 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2529 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2530 return createBinOpReverse(LHS, V2);
2531
2532 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2533 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2534 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2535 BO->copyIRFlags(V: &Inst);
2536
2537 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2538 Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2539 Module *M = Inst.getModule();
2540 Function *F = Intrinsic::getOrInsertDeclaration(
2541 M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType());
2542 return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2543 };
2544
2545 Value *EVL;
2546 if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2547 Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2548 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2549 if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2550 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2551 (LHS->hasOneUse() || RHS->hasOneUse() ||
2552 (LHS == RHS && LHS->hasNUses(N: 2))))
2553 return createBinOpVPReverse(V1, V2, EVL);
2554
2555 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2556 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2557 return createBinOpVPReverse(V1, RHS, EVL);
2558 }
2559 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2560 else if (isSplatValue(V: LHS) &&
2561 match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2562 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2563 return createBinOpVPReverse(LHS, V2, EVL);
2564
2565 if (Instruction *Folded =
2566 foldSpliceBinOp<Intrinsic::vector_splice_left>(Inst, Builder))
2567 return Folded;
2568 if (Instruction *Folded =
2569 foldSpliceBinOp<Intrinsic::vector_splice_right>(Inst, Builder))
2570 return Folded;
2571
2572 // It may not be safe to reorder shuffles and things like div, urem, etc.
2573 // because we may trap when executing those ops on unknown vector elements.
2574 // See PR20059.
2575 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2576 return nullptr;
2577
2578 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2579 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2580 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2581 BO->copyIRFlags(V: &Inst);
2582 return new ShuffleVectorInst(XY, M);
2583 };
2584
2585 // If both arguments of the binary operation are shuffles that use the same
2586 // mask and shuffle within a single vector, move the shuffle after the binop.
2587 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2588 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2589 V1->getType() == V2->getType() &&
2590 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2591 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2592 return createBinOpShuffle(V1, V2, Mask);
2593 }
2594
2595 // If both arguments of a commutative binop are select-shuffles that use the
2596 // same mask with commuted operands, the shuffles are unnecessary.
2597 if (Inst.isCommutative() &&
2598 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2599 match(V: RHS,
2600 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2601 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2602 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2603 // TODO: Allow shuffles that contain undefs in the mask?
2604 // That is legal, but it reduces undef knowledge.
2605 // TODO: Allow arbitrary shuffles by shuffling after binop?
2606 // That might be legal, but we have to deal with poison.
2607 if (LShuf->isSelect() &&
2608 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2609 RShuf->isSelect() &&
2610 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2611 // Example:
2612 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2613 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2614 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2615 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2616 NewBO->copyIRFlags(V: &Inst);
2617 return NewBO;
2618 }
2619 }
2620
2621 // If one argument is a shuffle within one vector and the other is a constant,
2622 // try moving the shuffle after the binary operation. This canonicalization
2623 // intends to move shuffles closer to other shuffles and binops closer to
2624 // other binops, so they can be folded. It may also enable demanded elements
2625 // transforms.
2626 Constant *C;
2627 if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2628 mask: m_Mask(Mask))),
2629 R: m_ImmConstant(C)))) {
2630 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2631 "Shuffle should not change scalar type");
2632
2633 bool ConstOp1 = isa<Constant>(Val: RHS);
2634 if (Constant *NewC =
2635 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2636 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2637 // which will cause UB for div/rem. Mask them with a safe constant.
2638 if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2639 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2640
2641 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2642 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2643 Value *NewLHS = ConstOp1 ? V1 : NewC;
2644 Value *NewRHS = ConstOp1 ? NewC : V1;
2645 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2646 }
2647 }
2648
2649 // Try to reassociate to sink a splat shuffle after a binary operation.
2650 if (Inst.isAssociative() && Inst.isCommutative()) {
2651 // Canonicalize shuffle operand as LHS.
2652 if (isa<ShuffleVectorInst>(Val: RHS))
2653 std::swap(a&: LHS, b&: RHS);
2654
2655 Value *X;
2656 ArrayRef<int> MaskC;
2657 int SplatIndex;
2658 Value *Y, *OtherOp;
2659 if (!match(V: LHS,
2660 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2661 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2662 X->getType() != Inst.getType() ||
2663 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2664 return nullptr;
2665
2666 // FIXME: This may not be safe if the analysis allows undef elements. By
2667 // moving 'Y' before the splat shuffle, we are implicitly assuming
2668 // that it is not undef/poison at the splat index.
2669 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2670 std::swap(a&: Y, b&: OtherOp);
2671 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2672 return nullptr;
2673 }
2674
2675 // X and Y are splatted values, so perform the binary operation on those
2676 // values followed by a splat followed by the 2nd binary operation:
2677 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2678 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2679 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2680 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2681 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2682
2683 // Intersect FMF on both new binops. Other (poison-generating) flags are
2684 // dropped to be safe.
2685 if (isa<FPMathOperator>(Val: R)) {
2686 R->copyFastMathFlags(I: &Inst);
2687 R->andIRFlags(V: RHS);
2688 }
2689 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2690 NewInstBO->copyIRFlags(V: R);
2691 return R;
2692 }
2693
2694 return nullptr;
2695}
2696
2697/// Try to narrow the width of a binop if at least 1 operand is an extend of
2698/// of a value. This requires a potentially expensive known bits check to make
2699/// sure the narrow op does not overflow.
2700Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2701 // We need at least one extended operand.
2702 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2703
2704 // If this is a sub, we swap the operands since we always want an extension
2705 // on the RHS. The LHS can be an extension or a constant.
2706 if (BO.getOpcode() == Instruction::Sub)
2707 std::swap(a&: Op0, b&: Op1);
2708
2709 Value *X;
2710 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2711 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2712 return nullptr;
2713
2714 // If both operands are the same extension from the same source type and we
2715 // can eliminate at least one (hasOneUse), this might work.
2716 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2717 Value *Y;
2718 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2719 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2720 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2721 // If that did not match, see if we have a suitable constant operand.
2722 // Truncating and extending must produce the same constant.
2723 Constant *WideC;
2724 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2725 return nullptr;
2726 Constant *NarrowC = getLosslessInvCast(C: WideC, InvCastTo: X->getType(), CastOp: CastOpc, DL);
2727 if (!NarrowC)
2728 return nullptr;
2729 Y = NarrowC;
2730 }
2731
2732 // Swap back now that we found our operands.
2733 if (BO.getOpcode() == Instruction::Sub)
2734 std::swap(a&: X, b&: Y);
2735
2736 // Both operands have narrow versions. Last step: the math must not overflow
2737 // in the narrow width.
2738 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2739 return nullptr;
2740
2741 // bo (ext X), (ext Y) --> ext (bo X, Y)
2742 // bo (ext X), C --> ext (bo X, C')
2743 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2744 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2745 if (IsSext)
2746 NewBinOp->setHasNoSignedWrap();
2747 else
2748 NewBinOp->setHasNoUnsignedWrap();
2749 }
2750 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2751}
2752
2753/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2754/// transform.
2755static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2756 GEPOperator &GEP2) {
2757 return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2758}
2759
2760/// Thread a GEP operation with constant indices through the constant true/false
2761/// arms of a select.
2762static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2763 InstCombiner::BuilderTy &Builder) {
2764 if (!GEP.hasAllConstantIndices())
2765 return nullptr;
2766
2767 Instruction *Sel;
2768 Value *Cond;
2769 Constant *TrueC, *FalseC;
2770 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2771 !match(V: Sel,
2772 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2773 return nullptr;
2774
2775 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2776 // Propagate 'inbounds' and metadata from existing instructions.
2777 // Note: using IRBuilder to create the constants for efficiency.
2778 SmallVector<Value *, 4> IndexC(GEP.indices());
2779 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2780 Type *Ty = GEP.getSourceElementType();
2781 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2782 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2783 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2784}
2785
2786// Canonicalization:
2787// gep T, (gep i8, base, C1), (Index + C2) into
2788// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2789static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2790 GEPOperator *Src,
2791 InstCombinerImpl &IC) {
2792 if (GEP.getNumIndices() != 1)
2793 return nullptr;
2794 auto &DL = IC.getDataLayout();
2795 Value *Base;
2796 const APInt *C1;
2797 if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2798 return nullptr;
2799 Value *VarIndex;
2800 const APInt *C2;
2801 Type *PtrTy = Src->getType()->getScalarType();
2802 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2803 if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2804 return nullptr;
2805 if (C1->getBitWidth() != IndexSizeInBits ||
2806 C2->getBitWidth() != IndexSizeInBits)
2807 return nullptr;
2808 Type *BaseType = GEP.getSourceElementType();
2809 if (isa<ScalableVectorType>(Val: BaseType))
2810 return nullptr;
2811 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2812 APInt NewOffset = TypeSize * *C2 + *C1;
2813 if (NewOffset.isZero() ||
2814 (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) {
2815 GEPNoWrapFlags Flags = GEPNoWrapFlags::none();
2816 if (GEP.hasNoUnsignedWrap() &&
2817 cast<GEPOperator>(Val: Src)->hasNoUnsignedWrap() &&
2818 match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()))) {
2819 Flags |= GEPNoWrapFlags::noUnsignedWrap();
2820 if (GEP.isInBounds() && cast<GEPOperator>(Val: Src)->isInBounds())
2821 Flags |= GEPNoWrapFlags::inBounds();
2822 }
2823
2824 Value *GEPConst =
2825 IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset), Name: "", NW: Flags);
2826 return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex, NW: Flags);
2827 }
2828
2829 return nullptr;
2830}
2831
2832/// Combine constant offsets separated by variable offsets.
2833/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2834static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2835 InstCombinerImpl &IC) {
2836 if (!GEP.hasAllConstantIndices())
2837 return nullptr;
2838
2839 GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2840 SmallVector<GetElementPtrInst *> Skipped;
2841 auto *InnerGEP = dyn_cast<GetElementPtrInst>(Val: GEP.getPointerOperand());
2842 while (true) {
2843 if (!InnerGEP)
2844 return nullptr;
2845
2846 NW = NW.intersectForReassociate(Other: InnerGEP->getNoWrapFlags());
2847 if (InnerGEP->hasAllConstantIndices())
2848 break;
2849
2850 if (!InnerGEP->hasOneUse())
2851 return nullptr;
2852
2853 Skipped.push_back(Elt: InnerGEP);
2854 InnerGEP = dyn_cast<GetElementPtrInst>(Val: InnerGEP->getPointerOperand());
2855 }
2856
2857 // The two constant offset GEPs are directly adjacent: Let normal offset
2858 // merging handle it.
2859 if (Skipped.empty())
2860 return nullptr;
2861
2862 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2863 // if profitable.
2864 if (!InnerGEP->hasOneUse())
2865 return nullptr;
2866
2867 // Don't bother with vector splats.
2868 Type *Ty = GEP.getType();
2869 if (InnerGEP->getType() != Ty)
2870 return nullptr;
2871
2872 const DataLayout &DL = IC.getDataLayout();
2873 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2874 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2875 !InnerGEP->accumulateConstantOffset(DL, Offset))
2876 return nullptr;
2877
2878 IC.replaceOperand(I&: *Skipped.back(), OpNum: 0, V: InnerGEP->getPointerOperand());
2879 for (GetElementPtrInst *SkippedGEP : Skipped)
2880 SkippedGEP->setNoWrapFlags(NW);
2881
2882 return IC.replaceInstUsesWith(
2883 I&: GEP,
2884 V: IC.Builder.CreatePtrAdd(Ptr: Skipped.front(), Offset: IC.Builder.getInt(AI: Offset), Name: "",
2885 NW: NW.intersectForOffsetAdd(Other: GEP.getNoWrapFlags())));
2886}
2887
2888Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2889 GEPOperator *Src) {
2890 // Combine Indices - If the source pointer to this getelementptr instruction
2891 // is a getelementptr instruction with matching element type, combine the
2892 // indices of the two getelementptr instructions into a single instruction.
2893 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2894 return nullptr;
2895
2896 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this))
2897 return I;
2898
2899 if (auto *I = combineConstantOffsets(GEP, IC&: *this))
2900 return I;
2901
2902 if (Src->getResultElementType() != GEP.getSourceElementType())
2903 return nullptr;
2904
2905 // Fold chained GEP with constant base into single GEP:
2906 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2907 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2908 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2909 Src->getNumIndices() == 1) {
2910 Value *SrcIdx = *Src->idx_begin();
2911 Value *GEPIdx = *GEP.idx_begin();
2912 const APInt *ConstOffset, *TrueVal, *FalseVal;
2913 Value *Cond;
2914
2915 if ((match(V: SrcIdx, P: m_APInt(Res&: ConstOffset)) &&
2916 match(V: GEPIdx,
2917 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal)))) ||
2918 (match(V: GEPIdx, P: m_APInt(Res&: ConstOffset)) &&
2919 match(V: SrcIdx,
2920 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal))))) {
2921 auto *Select = isa<SelectInst>(Val: GEPIdx) ? cast<SelectInst>(Val: GEPIdx)
2922 : cast<SelectInst>(Val: SrcIdx);
2923
2924 // Make sure the select has only one use.
2925 if (!Select->hasOneUse())
2926 return nullptr;
2927
2928 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2929 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2930 return nullptr;
2931
2932 APInt NewTrueVal = *ConstOffset + *TrueVal;
2933 APInt NewFalseVal = *ConstOffset + *FalseVal;
2934 Constant *NewTrue = ConstantInt::get(Ty: Select->getType(), V: NewTrueVal);
2935 Constant *NewFalse = ConstantInt::get(Ty: Select->getType(), V: NewFalseVal);
2936 Value *NewSelect = Builder.CreateSelect(
2937 C: Cond, True: NewTrue, False: NewFalse, /*Name=*/"",
2938 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2939 GEPNoWrapFlags Flags =
2940 getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2941 return replaceInstUsesWith(I&: GEP,
2942 V: Builder.CreateGEP(Ty: GEP.getResultElementType(),
2943 Ptr: Src->getPointerOperand(),
2944 IdxList: NewSelect, Name: "", NW: Flags));
2945 }
2946 }
2947
2948 // Find out whether the last index in the source GEP is a sequential idx.
2949 bool EndsWithSequential = false;
2950 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2951 I != E; ++I)
2952 EndsWithSequential = I.isSequential();
2953 if (!EndsWithSequential)
2954 return nullptr;
2955
2956 // Replace: gep (gep %P, long B), long A, ...
2957 // With: T = long A+B; gep %P, T, ...
2958 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands() - 1);
2959 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2960
2961 // If they aren't the same type, then the input hasn't been processed
2962 // by the loop above yet (which canonicalizes sequential index types to
2963 // intptr_t). Just avoid transforming this until the input has been
2964 // normalized.
2965 if (SO1->getType() != GO1->getType())
2966 return nullptr;
2967
2968 Value *Sum =
2969 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2970 // Only do the combine when we are sure the cost after the
2971 // merge is never more than that before the merge.
2972 if (Sum == nullptr)
2973 return nullptr;
2974
2975 SmallVector<Value *, 8> Indices;
2976 Indices.append(in_start: Src->op_begin() + 1, in_end: Src->op_end() - 1);
2977 Indices.push_back(Elt: Sum);
2978 Indices.append(in_start: GEP.op_begin() + 2, in_end: GEP.op_end());
2979
2980 // Don't create GEPs with more than one non-zero index.
2981 unsigned NumNonZeroIndices = count_if(Range&: Indices, P: [](Value *Idx) {
2982 auto *C = dyn_cast<Constant>(Val: Idx);
2983 return !C || !C->isNullValue();
2984 });
2985 if (NumNonZeroIndices > 1)
2986 return nullptr;
2987
2988 return replaceInstUsesWith(
2989 I&: GEP, V: Builder.CreateGEP(
2990 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
2991 NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2992}
2993
2994Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
2995 BuilderTy *Builder,
2996 bool &DoesConsume, unsigned Depth) {
2997 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2998 // ~(~(X)) -> X.
2999 Value *A, *B;
3000 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
3001 DoesConsume = true;
3002 return A;
3003 }
3004
3005 Constant *C;
3006 // Constants can be considered to be not'ed values.
3007 if (match(V, P: m_ImmConstant(C)))
3008 return ConstantExpr::getNot(C);
3009
3010 if (Depth++ >= MaxAnalysisRecursionDepth)
3011 return nullptr;
3012
3013 // The rest of the cases require that we invert all uses so don't bother
3014 // doing the analysis if we know we can't use the result.
3015 if (!WillInvertAllUses)
3016 return nullptr;
3017
3018 // Compares can be inverted if all of their uses are being modified to use
3019 // the ~V.
3020 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
3021 if (Builder != nullptr)
3022 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
3023 RHS: I->getOperand(i_nocapture: 1));
3024 return NonNull;
3025 }
3026
3027 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3028 // `(-1 - B) - A` if we are willing to invert all of the uses.
3029 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3030 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3031 DoesConsume, Depth))
3032 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
3033 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3034 DoesConsume, Depth))
3035 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
3036 return nullptr;
3037 }
3038
3039 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3040 // into `A ^ B` if we are willing to invert all of the uses.
3041 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3042 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3043 DoesConsume, Depth))
3044 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
3045 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3046 DoesConsume, Depth))
3047 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
3048 return nullptr;
3049 }
3050
3051 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3052 // `A + (-1 - B)` if we are willing to invert all of the uses.
3053 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3054 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3055 DoesConsume, Depth))
3056 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
3057 return nullptr;
3058 }
3059
3060 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3061 // into `A s>> B` if we are willing to invert all of the uses.
3062 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3063 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3064 DoesConsume, Depth))
3065 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
3066 return nullptr;
3067 }
3068
3069 Value *Cond;
3070 // LogicOps are special in that we canonicalize them at the cost of an
3071 // instruction.
3072 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
3073 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
3074 // Selects/min/max with invertible operands are freely invertible
3075 if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3076 bool LocalDoesConsume = DoesConsume;
3077 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
3078 DoesConsume&: LocalDoesConsume, Depth))
3079 return nullptr;
3080 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3081 DoesConsume&: LocalDoesConsume, Depth)) {
3082 DoesConsume = LocalDoesConsume;
3083 if (Builder != nullptr) {
3084 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3085 DoesConsume, Depth);
3086 assert(NotB != nullptr &&
3087 "Unable to build inverted value for known freely invertable op");
3088 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
3089 return Builder->CreateBinaryIntrinsic(
3090 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
3091 return Builder->CreateSelect(
3092 C: Cond, True: NotA, False: NotB, Name: "",
3093 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : cast<Instruction>(Val: V));
3094 }
3095 return NonNull;
3096 }
3097 }
3098
3099 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
3100 bool LocalDoesConsume = DoesConsume;
3101 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
3102 for (Use &U : PN->operands()) {
3103 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3104 Value *NewIncomingVal = getFreelyInvertedImpl(
3105 V: U.get(), /*WillInvertAllUses=*/false,
3106 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
3107 if (NewIncomingVal == nullptr)
3108 return nullptr;
3109 // Make sure that we can safely erase the original PHI node.
3110 if (NewIncomingVal == V)
3111 return nullptr;
3112 if (Builder != nullptr)
3113 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
3114 }
3115
3116 DoesConsume = LocalDoesConsume;
3117 if (Builder != nullptr) {
3118 IRBuilderBase::InsertPointGuard Guard(*Builder);
3119 Builder->SetInsertPoint(PN);
3120 PHINode *NewPN =
3121 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
3122 for (auto [Val, Pred] : IncomingValues)
3123 NewPN->addIncoming(V: Val, BB: Pred);
3124 return NewPN;
3125 }
3126 return NonNull;
3127 }
3128
3129 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
3130 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3131 DoesConsume, Depth))
3132 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
3133 return nullptr;
3134 }
3135
3136 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
3137 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3138 DoesConsume, Depth))
3139 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
3140 return nullptr;
3141 }
3142
3143 // De Morgan's Laws:
3144 // (~(A | B)) -> (~A & ~B)
3145 // (~(A & B)) -> (~A | ~B)
3146 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3147 bool IsLogical, Value *A,
3148 Value *B) -> Value * {
3149 bool LocalDoesConsume = DoesConsume;
3150 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
3151 DoesConsume&: LocalDoesConsume, Depth))
3152 return nullptr;
3153 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3154 DoesConsume&: LocalDoesConsume, Depth)) {
3155 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3156 DoesConsume&: LocalDoesConsume, Depth);
3157 DoesConsume = LocalDoesConsume;
3158 if (IsLogical)
3159 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
3160 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
3161 }
3162
3163 return nullptr;
3164 };
3165
3166 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
3167 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3168 B);
3169
3170 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
3171 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3172 B);
3173
3174 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
3175 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3176 B);
3177
3178 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
3179 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3180 B);
3181
3182 return nullptr;
3183}
3184
3185/// Return true if we should canonicalize the gep to an i8 ptradd.
3186static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
3187 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3188 Type *GEPEltType = GEP.getSourceElementType();
3189 if (GEPEltType->isIntegerTy(Bitwidth: 8))
3190 return false;
3191
3192 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3193 // intrinsic. This has better support in BasicAA.
3194 if (GEPEltType->isScalableTy())
3195 return true;
3196
3197 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3198 // together.
3199 if (GEP.getNumIndices() == 1 &&
3200 match(V: GEP.getOperand(i_nocapture: 1),
3201 P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
3202 R: m_Shl(L: m_Value(), R: m_ConstantInt())))))
3203 return true;
3204
3205 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3206 // possibly be merged together.
3207 auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
3208 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3209 any_of(Range: GEP.indices(), P: [](Value *V) {
3210 const APInt *C;
3211 return match(V, P: m_APInt(Res&: C)) && !C->isZero();
3212 });
3213}
3214
3215static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN,
3216 IRBuilderBase &Builder) {
3217 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
3218 if (!Op1)
3219 return nullptr;
3220
3221 // Don't fold a GEP into itself through a PHI node. This can only happen
3222 // through the back-edge of a loop. Folding a GEP into itself means that
3223 // the value of the previous iteration needs to be stored in the meantime,
3224 // thus requiring an additional register variable to be live, but not
3225 // actually achieving anything (the GEP still needs to be executed once per
3226 // loop iteration).
3227 if (Op1 == &GEP)
3228 return nullptr;
3229 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3230
3231 int DI = -1;
3232
3233 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3234 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
3235 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3236 Op1->getSourceElementType() != Op2->getSourceElementType())
3237 return nullptr;
3238
3239 // As for Op1 above, don't try to fold a GEP into itself.
3240 if (Op2 == &GEP)
3241 return nullptr;
3242
3243 // Keep track of the type as we walk the GEP.
3244 Type *CurTy = nullptr;
3245
3246 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3247 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3248 return nullptr;
3249
3250 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3251 if (DI == -1) {
3252 // We have not seen any differences yet in the GEPs feeding the
3253 // PHI yet, so we record this one if it is allowed to be a
3254 // variable.
3255
3256 // The first two arguments can vary for any GEP, the rest have to be
3257 // static for struct slots
3258 if (J > 1) {
3259 assert(CurTy && "No current type?");
3260 if (CurTy->isStructTy())
3261 return nullptr;
3262 }
3263
3264 DI = J;
3265 } else {
3266 // The GEP is different by more than one input. While this could be
3267 // extended to support GEPs that vary by more than one variable it
3268 // doesn't make sense since it greatly increases the complexity and
3269 // would result in an R+R+R addressing mode which no backend
3270 // directly supports and would need to be broken into several
3271 // simpler instructions anyway.
3272 return nullptr;
3273 }
3274 }
3275
3276 // Sink down a layer of the type for the next iteration.
3277 if (J > 0) {
3278 if (J == 1) {
3279 CurTy = Op1->getSourceElementType();
3280 } else {
3281 CurTy =
3282 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3283 }
3284 }
3285 }
3286
3287 NW &= Op2->getNoWrapFlags();
3288 }
3289
3290 // If not all GEPs are identical we'll have to create a new PHI node.
3291 // Check that the old PHI node has only one use so that it will get
3292 // removed.
3293 if (DI != -1 && !PN->hasOneUse())
3294 return nullptr;
3295
3296 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3297 NewGEP->setNoWrapFlags(NW);
3298
3299 if (DI == -1) {
3300 // All the GEPs feeding the PHI are identical. Clone one down into our
3301 // BB so that it can be merged with the current GEP.
3302 } else {
3303 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3304 // into the current block so it can be merged, and create a new PHI to
3305 // set that index.
3306 PHINode *NewPN;
3307 {
3308 IRBuilderBase::InsertPointGuard Guard(Builder);
3309 Builder.SetInsertPoint(PN);
3310 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3311 NumReservedValues: PN->getNumOperands());
3312 }
3313
3314 for (auto &I : PN->operands())
3315 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3316 BB: PN->getIncomingBlock(U: I));
3317
3318 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3319 }
3320
3321 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3322 return NewGEP;
3323}
3324
3325Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3326 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3327 SmallVector<Value *, 8> Indices(GEP.indices());
3328 Type *GEPType = GEP.getType();
3329 Type *GEPEltType = GEP.getSourceElementType();
3330 if (Value *V =
3331 simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3332 Q: SQ.getWithInstruction(I: &GEP)))
3333 return replaceInstUsesWith(I&: GEP, V);
3334
3335 // For vector geps, use the generic demanded vector support.
3336 // Skip if GEP return type is scalable. The number of elements is unknown at
3337 // compile-time.
3338 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3339 auto VWidth = GEPFVTy->getNumElements();
3340 APInt PoisonElts(VWidth, 0);
3341 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3342 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3343 PoisonElts)) {
3344 if (V != &GEP)
3345 return replaceInstUsesWith(I&: GEP, V);
3346 return &GEP;
3347 }
3348 }
3349
3350 // Eliminate unneeded casts for indices, and replace indices which displace
3351 // by multiples of a zero size type with zero.
3352 bool MadeChange = false;
3353
3354 // Index width may not be the same width as pointer width.
3355 // Data layout chooses the right type based on supported integer types.
3356 Type *NewScalarIndexTy =
3357 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3358
3359 gep_type_iterator GTI = gep_type_begin(GEP);
3360 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3361 ++I, ++GTI) {
3362 // Skip indices into struct types.
3363 if (GTI.isStruct())
3364 continue;
3365
3366 Type *IndexTy = (*I)->getType();
3367 Type *NewIndexType =
3368 IndexTy->isVectorTy()
3369 ? VectorType::get(ElementType: NewScalarIndexTy,
3370 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3371 : NewScalarIndexTy;
3372
3373 // If the element type has zero size then any index over it is equivalent
3374 // to an index of zero, so replace it with zero if it is not zero already.
3375 Type *EltTy = GTI.getIndexedType();
3376 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3377 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
3378 *I = Constant::getNullValue(Ty: NewIndexType);
3379 MadeChange = true;
3380 }
3381
3382 if (IndexTy != NewIndexType) {
3383 // If we are using a wider index than needed for this platform, shrink
3384 // it to what we need. If narrower, sign-extend it to what we need.
3385 // This explicit cast can make subsequent optimizations more obvious.
3386 if (IndexTy->getScalarSizeInBits() <
3387 NewIndexType->getScalarSizeInBits()) {
3388 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3389 *I = Builder.CreateZExt(V: *I, DestTy: NewIndexType, Name: "", /*IsNonNeg=*/true);
3390 else
3391 *I = Builder.CreateSExt(V: *I, DestTy: NewIndexType);
3392 } else {
3393 *I = Builder.CreateTrunc(V: *I, DestTy: NewIndexType, Name: "", IsNUW: GEP.hasNoUnsignedWrap(),
3394 IsNSW: GEP.hasNoUnsignedSignedWrap());
3395 }
3396 MadeChange = true;
3397 }
3398 }
3399 if (MadeChange)
3400 return &GEP;
3401
3402 // Canonicalize constant GEPs to i8 type.
3403 if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) {
3404 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
3405 if (GEP.accumulateConstantOffset(DL, Offset))
3406 return replaceInstUsesWith(
3407 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3408 NW: GEP.getNoWrapFlags()));
3409 }
3410
3411 if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3412 Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3413 Value *NewGEP =
3414 Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3415 return replaceInstUsesWith(I&: GEP, V: NewGEP);
3416 }
3417
3418 // Strip trailing zero indices.
3419 auto *LastIdx = dyn_cast<Constant>(Val: Indices.back());
3420 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3421 return replaceInstUsesWith(
3422 I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: PtrOp,
3423 IdxList: drop_end(RangeOrContainer&: Indices), Name: "", NW: GEP.getNoWrapFlags()));
3424 }
3425
3426 // Strip leading zero indices.
3427 auto *FirstIdx = dyn_cast<Constant>(Val: Indices.front());
3428 if (FirstIdx && FirstIdx->isNullValue() &&
3429 !FirstIdx->getType()->isVectorTy()) {
3430 gep_type_iterator GTI = gep_type_begin(GEP);
3431 ++GTI;
3432 if (!GTI.isStruct())
3433 return replaceInstUsesWith(I&: GEP, V: Builder.CreateGEP(Ty: GTI.getIndexedType(),
3434 Ptr: GEP.getPointerOperand(),
3435 IdxList: drop_begin(RangeOrContainer&: Indices), Name: "",
3436 NW: GEP.getNoWrapFlags()));
3437 }
3438
3439 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3440 // Note that this looses information about undef lanes; we run it after
3441 // demanded bits to partially mitigate that loss.
3442 if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3443 return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3444 })) {
3445 SmallVector<Value *> NewOps;
3446 for (auto &Op : GEP.operands()) {
3447 if (Op->getType()->isVectorTy())
3448 if (Value *Scalar = getSplatValue(V: Op)) {
3449 NewOps.push_back(Elt: Scalar);
3450 continue;
3451 }
3452 NewOps.push_back(Elt: Op);
3453 }
3454
3455 Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0],
3456 IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3457 NW: GEP.getNoWrapFlags());
3458 if (!Res->getType()->isVectorTy()) {
3459 ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3460 Res = Builder.CreateVectorSplat(EC, V: Res);
3461 }
3462 return replaceInstUsesWith(I&: GEP, V: Res);
3463 }
3464
3465 bool SeenNonZeroIndex = false;
3466 for (auto [IdxNum, Idx] : enumerate(First&: Indices)) {
3467 // Ignore one leading zero index.
3468 auto *C = dyn_cast<Constant>(Val: Idx);
3469 if (C && C->isNullValue() && IdxNum == 0)
3470 continue;
3471
3472 if (!SeenNonZeroIndex) {
3473 SeenNonZeroIndex = true;
3474 continue;
3475 }
3476
3477 // GEP has multiple non-zero indices: Split it.
3478 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(N: IdxNum);
3479 Value *FrontGEP =
3480 Builder.CreateGEP(Ty: GEPEltType, Ptr: PtrOp, IdxList: FrontIndices,
3481 Name: GEP.getName() + ".split", NW: GEP.getNoWrapFlags());
3482
3483 SmallVector<Value *> BackIndices;
3484 BackIndices.push_back(Elt: Constant::getNullValue(Ty: NewScalarIndexTy));
3485 append_range(C&: BackIndices, R: drop_begin(RangeOrContainer&: Indices, N: IdxNum));
3486 return GetElementPtrInst::Create(
3487 PointeeType: GetElementPtrInst::getIndexedType(Ty: GEPEltType, IdxList: FrontIndices), Ptr: FrontGEP,
3488 IdxList: BackIndices, NW: GEP.getNoWrapFlags());
3489 }
3490
3491 // Check to see if the inputs to the PHI node are getelementptr instructions.
3492 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3493 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3494 return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp);
3495 }
3496
3497 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3498 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3499 return I;
3500
3501 if (GEP.getNumIndices() == 1) {
3502 unsigned AS = GEP.getPointerAddressSpace();
3503 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
3504 DL.getIndexSizeInBits(AS)) {
3505 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3506
3507 if (TyAllocSize == 1) {
3508 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3509 // but only if the result pointer is only used as if it were an integer.
3510 // (The case where the underlying object is the same is handled by
3511 // InstSimplify.)
3512 Value *X = GEP.getPointerOperand();
3513 Value *Y;
3514 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_Sub(L: m_PtrToIntOrAddr(Op: m_Value(V&: Y)),
3515 R: m_PtrToIntOrAddr(Op: m_Specific(V: X)))) &&
3516 GEPType == Y->getType()) {
3517 bool HasNonAddressBits =
3518 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3519 bool Changed = false;
3520 GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3521 bool ShouldReplace =
3522 isa<PtrToAddrInst, ICmpInst>(Val: U.getUser()) ||
3523 (!HasNonAddressBits && isa<PtrToIntInst>(Val: U.getUser()));
3524 Changed |= ShouldReplace;
3525 return ShouldReplace;
3526 });
3527 return Changed ? &GEP : nullptr;
3528 }
3529 } else if (auto *ExactIns =
3530 dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) {
3531 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3532 Value *V;
3533 if (ExactIns->isExact()) {
3534 if ((has_single_bit(Value: TyAllocSize) &&
3535 match(V: GEP.getOperand(i_nocapture: 1),
3536 P: m_Shr(L: m_Value(V),
3537 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) ||
3538 match(V: GEP.getOperand(i_nocapture: 1),
3539 P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3540 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3541 Ptr: GEP.getPointerOperand(), IdxList: V,
3542 NW: GEP.getNoWrapFlags());
3543 }
3544 }
3545 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3546 // Try to canonicalize non-i8 element type to i8 if the index is an
3547 // exact instruction. If the index is an exact instruction (div/shr)
3548 // with a constant RHS, we can fold the non-i8 element scale into the
3549 // div/shr (similiar to the mul case, just inverted).
3550 const APInt *C;
3551 std::optional<APInt> NewC;
3552 if (has_single_bit(Value: TyAllocSize) &&
3553 match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3554 C->uge(RHS: countr_zero(Val: TyAllocSize)))
3555 NewC = *C - countr_zero(Val: TyAllocSize);
3556 else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3557 APInt Quot;
3558 uint64_t Rem;
3559 APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3560 if (Rem == 0)
3561 NewC = Quot;
3562 } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3563 APInt Quot;
3564 int64_t Rem;
3565 APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3566 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3567 if (!Quot.isAllOnes() && Rem == 0)
3568 NewC = Quot;
3569 }
3570
3571 if (NewC.has_value()) {
3572 Value *NewOp = Builder.CreateBinOp(
3573 Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3574 RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
3575 cast<BinaryOperator>(Val: NewOp)->setIsExact();
3576 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3577 Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3578 NW: GEP.getNoWrapFlags());
3579 }
3580 }
3581 }
3582 }
3583 }
3584 // We do not handle pointer-vector geps here.
3585 if (GEPType->isVectorTy())
3586 return nullptr;
3587
3588 if (!GEP.isInBounds()) {
3589 unsigned IdxWidth =
3590 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3591 APInt BasePtrOffset(IdxWidth, 0);
3592 Value *UnderlyingPtrOp =
3593 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3594 bool CanBeNull, CanBeFreed;
3595 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3596 DL, CanBeNull, CanBeFreed);
3597 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3598 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3599 BasePtrOffset.isNonNegative()) {
3600 APInt AllocSize(IdxWidth, DerefBytes);
3601 if (BasePtrOffset.ule(RHS: AllocSize)) {
3602 return GetElementPtrInst::CreateInBounds(
3603 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3604 }
3605 }
3606 }
3607 }
3608
3609 // nusw + nneg -> nuw
3610 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3611 all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3612 return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3613 })) {
3614 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3615 return &GEP;
3616 }
3617
3618 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3619 // to do this after having tried to derive "nuw" above.
3620 if (GEP.getNumIndices() == 1) {
3621 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3622 // geps if transforming into (gep (gep p, x), y).
3623 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3624 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3625 // that x + y does not have unsigned wrap.
3626 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3627 return GEP.getNoWrapFlags();
3628 return GEPNoWrapFlags::none();
3629 };
3630
3631 // Try to replace ADD + GEP with GEP + GEP.
3632 Value *Idx1, *Idx2;
3633 if (match(V: GEP.getOperand(i_nocapture: 1),
3634 P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3635 // %idx = add i64 %idx1, %idx2
3636 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3637 // as:
3638 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3639 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3640 bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3641 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3642 auto *NewPtr =
3643 Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3644 IdxList: Idx1, Name: "", NW: NWFlags);
3645 return replaceInstUsesWith(I&: GEP,
3646 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3647 Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3648 }
3649 ConstantInt *C;
3650 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3651 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3652 // %add = add nsw i32 %idx1, idx2
3653 // %sidx = sext i32 %add to i64
3654 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3655 // as:
3656 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3657 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3658 bool NUW = match(V: GEP.getOperand(i_nocapture: 1),
3659 P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3660 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3661 auto *NewPtr = Builder.CreateGEP(
3662 Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3663 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "", NW: NWFlags);
3664 return replaceInstUsesWith(
3665 I&: GEP,
3666 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3667 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()),
3668 Name: "", NW: NWFlags));
3669 }
3670 }
3671
3672 if (Instruction *R = foldSelectGEP(GEP, Builder))
3673 return R;
3674
3675 // srem -> (and/urem) for inbounds+nuw GEP
3676 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3677 Value *X, *Y;
3678
3679 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3680 if (match(V: Indices[0], P: m_OneUse(SubPattern: m_SRem(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
3681 isKnownToBeAPowerOfTwo(V: Y, /*OrZero=*/true, CxtI: &GEP)) {
3682 // If GEP is inbounds+nuw, the offset cannot be negative
3683 // -> srem by power-of-two can be treated as urem,
3684 // and urem by power-of-two folds to 'and' later.
3685 // OrZero=true is fine here because division by zero is UB.
3686 Instruction *OldIdxI = cast<Instruction>(Val: Indices[0]);
3687 Value *NewIdx = Builder.CreateURem(LHS: X, RHS: Y, Name: OldIdxI->getName());
3688
3689 return GetElementPtrInst::Create(PointeeType: GEPEltType, Ptr: PtrOp, IdxList: {NewIdx},
3690 NW: GEP.getNoWrapFlags());
3691 }
3692 }
3693
3694 return nullptr;
3695}
3696
3697static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
3698 Instruction *AI) {
3699 if (isa<ConstantPointerNull>(Val: V))
3700 return true;
3701 if (auto *LI = dyn_cast<LoadInst>(Val: V))
3702 return isa<GlobalVariable>(Val: LI->getPointerOperand());
3703 // Two distinct allocations will never be equal.
3704 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3705}
3706
3707/// Given a call CB which uses an address UsedV, return true if we can prove the
3708/// call's only possible effect is storing to V.
3709static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3710 const TargetLibraryInfo &TLI) {
3711 if (!CB.use_empty())
3712 // TODO: add recursion if returned attribute is present
3713 return false;
3714
3715 if (CB.isTerminator())
3716 // TODO: remove implementation restriction
3717 return false;
3718
3719 if (!CB.willReturn() || !CB.doesNotThrow())
3720 return false;
3721
3722 // If the only possible side effect of the call is writing to the alloca,
3723 // and the result isn't used, we can safely remove any reads implied by the
3724 // call including those which might read the alloca itself.
3725 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3726 return Dest && Dest->Ptr == UsedV;
3727}
3728
3729static std::optional<ModRefInfo>
3730isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
3731 const TargetLibraryInfo &TLI, bool KnowInit) {
3732 SmallVector<Instruction*, 4> Worklist;
3733 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3734 Worklist.push_back(Elt: AI);
3735 ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3736
3737 do {
3738 Instruction *PI = Worklist.pop_back_val();
3739 for (User *U : PI->users()) {
3740 Instruction *I = cast<Instruction>(Val: U);
3741 switch (I->getOpcode()) {
3742 default:
3743 // Give up the moment we see something we can't handle.
3744 return std::nullopt;
3745
3746 case Instruction::AddrSpaceCast:
3747 case Instruction::BitCast:
3748 case Instruction::GetElementPtr:
3749 Users.emplace_back(Args&: I);
3750 Worklist.push_back(Elt: I);
3751 continue;
3752
3753 case Instruction::ICmp: {
3754 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3755 // We can fold eq/ne comparisons with null to false/true, respectively.
3756 // We also fold comparisons in some conditions provided the alloc has
3757 // not escaped (see isNeverEqualToUnescapedAlloc).
3758 if (!ICI->isEquality())
3759 return std::nullopt;
3760 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3761 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3762 return std::nullopt;
3763
3764 // Do not fold compares to aligned_alloc calls, as they may have to
3765 // return null in case the required alignment cannot be satisfied,
3766 // unless we can prove that both alignment and size are valid.
3767 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3768 // Check if alignment and size of a call to aligned_alloc is valid,
3769 // that is alignment is a power-of-2 and the size is a multiple of the
3770 // alignment.
3771 const APInt *Alignment;
3772 const APInt *Size;
3773 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3774 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3775 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3776 };
3777 auto *CB = dyn_cast<CallBase>(Val: AI);
3778 LibFunc TheLibFunc;
3779 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3780 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3781 !AlignmentAndSizeKnownValid(CB))
3782 return std::nullopt;
3783 Users.emplace_back(Args&: I);
3784 continue;
3785 }
3786
3787 case Instruction::Call:
3788 // Ignore no-op and store intrinsics.
3789 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3790 switch (II->getIntrinsicID()) {
3791 default:
3792 return std::nullopt;
3793
3794 case Intrinsic::memmove:
3795 case Intrinsic::memcpy:
3796 case Intrinsic::memset: {
3797 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3798 if (MI->isVolatile())
3799 return std::nullopt;
3800 // Note: this could also be ModRef, but we can still interpret that
3801 // as just Mod in that case.
3802 ModRefInfo NewAccess =
3803 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3804 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3805 return std::nullopt;
3806 Access |= NewAccess;
3807 [[fallthrough]];
3808 }
3809 case Intrinsic::assume:
3810 case Intrinsic::invariant_start:
3811 case Intrinsic::invariant_end:
3812 case Intrinsic::lifetime_start:
3813 case Intrinsic::lifetime_end:
3814 case Intrinsic::objectsize:
3815 Users.emplace_back(Args&: I);
3816 continue;
3817 case Intrinsic::launder_invariant_group:
3818 case Intrinsic::strip_invariant_group:
3819 Users.emplace_back(Args&: I);
3820 Worklist.push_back(Elt: I);
3821 continue;
3822 }
3823 }
3824
3825 if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3826 getAllocationFamily(I, TLI: &TLI) == Family) {
3827 Users.emplace_back(Args&: I);
3828 continue;
3829 }
3830
3831 if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3832 getAllocationFamily(I, TLI: &TLI) == Family) {
3833 Users.emplace_back(Args&: I);
3834 Worklist.push_back(Elt: I);
3835 continue;
3836 }
3837
3838 if (!isRefSet(MRI: Access) &&
3839 isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3840 Access |= ModRefInfo::Mod;
3841 Users.emplace_back(Args&: I);
3842 continue;
3843 }
3844
3845 return std::nullopt;
3846
3847 case Instruction::Store: {
3848 StoreInst *SI = cast<StoreInst>(Val: I);
3849 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3850 return std::nullopt;
3851 if (isRefSet(MRI: Access))
3852 return std::nullopt;
3853 Access |= ModRefInfo::Mod;
3854 Users.emplace_back(Args&: I);
3855 continue;
3856 }
3857
3858 case Instruction::Load: {
3859 LoadInst *LI = cast<LoadInst>(Val: I);
3860 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3861 return std::nullopt;
3862 if (isModSet(MRI: Access))
3863 return std::nullopt;
3864 Access |= ModRefInfo::Ref;
3865 Users.emplace_back(Args&: I);
3866 continue;
3867 }
3868 }
3869 llvm_unreachable("missing a return?");
3870 }
3871 } while (!Worklist.empty());
3872
3873 assert(Access != ModRefInfo::ModRef);
3874 return Access;
3875}
3876
3877Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3878 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3879
3880 // If we have a malloc call which is only used in any amount of comparisons to
3881 // null and free calls, delete the calls and replace the comparisons with true
3882 // or false as appropriate.
3883
3884 // This is based on the principle that we can substitute our own allocation
3885 // function (which will never return null) rather than knowledge of the
3886 // specific function being called. In some sense this can change the permitted
3887 // outputs of a program (when we convert a malloc to an alloca, the fact that
3888 // the allocation is now on the stack is potentially visible, for example),
3889 // but we believe in a permissible manner.
3890 SmallVector<WeakTrackingVH, 64> Users;
3891
3892 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3893 // before each store.
3894 SmallVector<DbgVariableRecord *, 8> DVRs;
3895 std::unique_ptr<DIBuilder> DIB;
3896 if (isa<AllocaInst>(Val: MI)) {
3897 findDbgUsers(V: &MI, DbgVariableRecords&: DVRs);
3898 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3899 }
3900
3901 // Determine what getInitialValueOfAllocation would return without actually
3902 // allocating the result.
3903 bool KnowInitUndef = false;
3904 bool KnowInitZero = false;
3905 Constant *Init =
3906 getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3907 if (Init) {
3908 if (isa<UndefValue>(Val: Init))
3909 KnowInitUndef = true;
3910 else if (Init->isNullValue())
3911 KnowInitZero = true;
3912 }
3913 // The various sanitizers don't actually return undef memory, but rather
3914 // memory initialized with special forms of runtime poison
3915 auto &F = *MI.getFunction();
3916 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3917 F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3918 KnowInitUndef = false;
3919
3920 auto Removable =
3921 isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef);
3922 if (Removable) {
3923 for (WeakTrackingVH &User : Users) {
3924 // Lowering all @llvm.objectsize and MTI calls first because they may use
3925 // a bitcast/GEP of the alloca we are removing.
3926 if (!User)
3927 continue;
3928
3929 Instruction *I = cast<Instruction>(Val: &*User);
3930
3931 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3932 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3933 SmallVector<Instruction *> InsertedInstructions;
3934 Value *Result = lowerObjectSizeCall(
3935 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3936 for (Instruction *Inserted : InsertedInstructions)
3937 Worklist.add(I: Inserted);
3938 replaceInstUsesWith(I&: *I, V: Result);
3939 eraseInstFromFunction(I&: *I);
3940 User = nullptr; // Skip examining in the next loop.
3941 continue;
3942 }
3943 if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3944 if (KnowInitZero && isRefSet(MRI: *Removable)) {
3945 IRBuilderBase::InsertPointGuard Guard(Builder);
3946 Builder.SetInsertPoint(MTI);
3947 auto *M = Builder.CreateMemSet(
3948 Ptr: MTI->getRawDest(),
3949 Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0),
3950 Size: MTI->getLength(), Align: MTI->getDestAlign());
3951 M->copyMetadata(SrcInst: *MTI);
3952 }
3953 }
3954 }
3955 }
3956 for (WeakTrackingVH &User : Users) {
3957 if (!User)
3958 continue;
3959
3960 Instruction *I = cast<Instruction>(Val: &*User);
3961
3962 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3963 replaceInstUsesWith(I&: *C,
3964 V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3965 V: C->isFalseWhenEqual()));
3966 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3967 for (auto *DVR : DVRs)
3968 if (DVR->isAddressOfVariable())
3969 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3970 } else {
3971 // Casts, GEP, or anything else: we're about to delete this instruction,
3972 // so it can not have any valid uses.
3973 Constant *Replace;
3974 if (isa<LoadInst>(Val: I)) {
3975 assert(KnowInitZero || KnowInitUndef);
3976 Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
3977 : Constant::getNullValue(Ty: I->getType());
3978 } else
3979 Replace = PoisonValue::get(T: I->getType());
3980 replaceInstUsesWith(I&: *I, V: Replace);
3981 }
3982 eraseInstFromFunction(I&: *I);
3983 }
3984
3985 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3986 // Replace invoke with a NOP intrinsic to maintain the original CFG
3987 Module *M = II->getModule();
3988 Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
3989 auto *NewII = InvokeInst::Create(
3990 Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
3991 NewII->setDebugLoc(II->getDebugLoc());
3992 }
3993
3994 // Remove debug intrinsics which describe the value contained within the
3995 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3996 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3997 //
3998 // ```
3999 // define void @foo(i32 %0) {
4000 // %a = alloca i32 ; Deleted.
4001 // store i32 %0, i32* %a
4002 // dbg.value(i32 %0, "arg0") ; Not deleted.
4003 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4004 // call void @trivially_inlinable_no_op(i32* %a)
4005 // ret void
4006 // }
4007 // ```
4008 //
4009 // This may not be required if we stop describing the contents of allocas
4010 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4011 // the LowerDbgDeclare utility.
4012 //
4013 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4014 // "arg0" dbg.value may be stale after the call. However, failing to remove
4015 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4016 //
4017 // FIXME: the Assignment Tracking project has now likely made this
4018 // redundant (and it's sometimes harmful).
4019 for (auto *DVR : DVRs)
4020 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4021 DVR->eraseFromParent();
4022
4023 return eraseInstFromFunction(I&: MI);
4024 }
4025 return nullptr;
4026}
4027
4028/// Move the call to free before a NULL test.
4029///
4030/// Check if this free is accessed after its argument has been test
4031/// against NULL (property 0).
4032/// If yes, it is legal to move this call in its predecessor block.
4033///
4034/// The move is performed only if the block containing the call to free
4035/// will be removed, i.e.:
4036/// 1. it has only one predecessor P, and P has two successors
4037/// 2. it contains the call, noops, and an unconditional branch
4038/// 3. its successor is the same as its predecessor's successor
4039///
4040/// The profitability is out-of concern here and this function should
4041/// be called only if the caller knows this transformation would be
4042/// profitable (e.g., for code size).
4043static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
4044 const DataLayout &DL) {
4045 Value *Op = FI.getArgOperand(i: 0);
4046 BasicBlock *FreeInstrBB = FI.getParent();
4047 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4048
4049 // Validate part of constraint #1: Only one predecessor
4050 // FIXME: We can extend the number of predecessor, but in that case, we
4051 // would duplicate the call to free in each predecessor and it may
4052 // not be profitable even for code size.
4053 if (!PredBB)
4054 return nullptr;
4055
4056 // Validate constraint #2: Does this block contains only the call to
4057 // free, noops, and an unconditional branch?
4058 BasicBlock *SuccBB;
4059 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4060 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
4061 return nullptr;
4062
4063 // If there are only 2 instructions in the block, at this point,
4064 // this is the call to free and unconditional.
4065 // If there are more than 2 instructions, check that they are noops
4066 // i.e., they won't hurt the performance of the generated code.
4067 if (FreeInstrBB->size() != 2) {
4068 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
4069 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
4070 continue;
4071 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
4072 if (!Cast || !Cast->isNoopCast(DL))
4073 return nullptr;
4074 }
4075 }
4076 // Validate the rest of constraint #1 by matching on the pred branch.
4077 Instruction *TI = PredBB->getTerminator();
4078 BasicBlock *TrueBB, *FalseBB;
4079 CmpPredicate Pred;
4080 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
4081 L: m_CombineOr(L: m_Specific(V: Op),
4082 R: m_Specific(V: Op->stripPointerCasts())),
4083 R: m_Zero()),
4084 T&: TrueBB, F&: FalseBB)))
4085 return nullptr;
4086 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4087 return nullptr;
4088
4089 // Validate constraint #3: Ensure the null case just falls through.
4090 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4091 return nullptr;
4092 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4093 "Broken CFG: missing edge from predecessor to successor");
4094
4095 // At this point, we know that everything in FreeInstrBB can be moved
4096 // before TI.
4097 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
4098 if (&Instr == FreeInstrBBTerminator)
4099 break;
4100 Instr.moveBeforePreserving(MovePos: TI->getIterator());
4101 }
4102 assert(FreeInstrBB->size() == 1 &&
4103 "Only the branch instruction should remain");
4104
4105 // Now that we've moved the call to free before the NULL check, we have to
4106 // remove any attributes on its parameter that imply it's non-null, because
4107 // those attributes might have only been valid because of the NULL check, and
4108 // we can get miscompiles if we keep them. This is conservative if non-null is
4109 // also implied by something other than the NULL check, but it's guaranteed to
4110 // be correct, and the conservativeness won't matter in practice, since the
4111 // attributes are irrelevant for the call to free itself and the pointer
4112 // shouldn't be used after the call.
4113 AttributeList Attrs = FI.getAttributes();
4114 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull);
4115 Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable);
4116 if (Dereferenceable.isValid()) {
4117 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4118 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0,
4119 Kind: Attribute::Dereferenceable);
4120 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
4121 }
4122 FI.setAttributes(Attrs);
4123
4124 return &FI;
4125}
4126
4127Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
4128 // free undef -> unreachable.
4129 if (isa<UndefValue>(Val: Op)) {
4130 // Leave a marker since we can't modify the CFG here.
4131 CreateNonTerminatorUnreachable(InsertAt: &FI);
4132 return eraseInstFromFunction(I&: FI);
4133 }
4134
4135 // If we have 'free null' delete the instruction. This can happen in stl code
4136 // when lots of inlining happens.
4137 if (isa<ConstantPointerNull>(Val: Op))
4138 return eraseInstFromFunction(I&: FI);
4139
4140 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4141 // realloc() entirely.
4142 CallInst *CI = dyn_cast<CallInst>(Val: Op);
4143 if (CI && CI->hasOneUse())
4144 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
4145 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
4146
4147 // If we optimize for code size, try to move the call to free before the null
4148 // test so that simplify cfg can remove the empty block and dead code
4149 // elimination the branch. I.e., helps to turn something like:
4150 // if (foo) free(foo);
4151 // into
4152 // free(foo);
4153 //
4154 // Note that we can only do this for 'free' and not for any flavor of
4155 // 'operator delete'; there is no 'operator delete' symbol for which we are
4156 // permitted to invent a call, even if we're passing in a null pointer.
4157 if (MinimizeSize) {
4158 LibFunc Func;
4159 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
4160 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
4161 return I;
4162 }
4163
4164 return nullptr;
4165}
4166
4167Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
4168 Value *RetVal = RI.getReturnValue();
4169 if (!RetVal)
4170 return nullptr;
4171
4172 Function *F = RI.getFunction();
4173 Type *RetTy = RetVal->getType();
4174 if (RetTy->isPointerTy()) {
4175 bool HasDereferenceable =
4176 F->getAttributes().getRetDereferenceableBytes() > 0;
4177 if (F->hasRetAttribute(Kind: Attribute::NonNull) ||
4178 (HasDereferenceable &&
4179 !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
4180 if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
4181 return replaceOperand(I&: RI, OpNum: 0, V);
4182 }
4183 }
4184
4185 if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
4186 return nullptr;
4187
4188 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4189 if (ReturnClass == fcNone)
4190 return nullptr;
4191
4192 KnownFPClass KnownClass;
4193 if (SimplifyDemandedFPClass(I: &RI, Op: 0, DemandedMask: ~ReturnClass, Known&: KnownClass))
4194 return &RI;
4195
4196 return nullptr;
4197}
4198
4199// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4200bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
4201 // Try to remove the previous instruction if it must lead to unreachable.
4202 // This includes instructions like stores and "llvm.assume" that may not get
4203 // removed by simple dead code elimination.
4204 bool Changed = false;
4205 while (Instruction *Prev = I.getPrevNode()) {
4206 // While we theoretically can erase EH, that would result in a block that
4207 // used to start with an EH no longer starting with EH, which is invalid.
4208 // To make it valid, we'd need to fixup predecessors to no longer refer to
4209 // this block, but that changes CFG, which is not allowed in InstCombine.
4210 if (Prev->isEHPad())
4211 break; // Can not drop any more instructions. We're done here.
4212
4213 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
4214 break; // Can not drop any more instructions. We're done here.
4215 // Otherwise, this instruction can be freely erased,
4216 // even if it is not side-effect free.
4217
4218 // A value may still have uses before we process it here (for example, in
4219 // another unreachable block), so convert those to poison.
4220 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
4221 eraseInstFromFunction(I&: *Prev);
4222 Changed = true;
4223 }
4224 return Changed;
4225}
4226
4227Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
4228 removeInstructionsBeforeUnreachable(I);
4229 return nullptr;
4230}
4231
4232Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
4233 assert(BI.isUnconditional() && "Only for unconditional branches.");
4234
4235 // If this store is the second-to-last instruction in the basic block
4236 // (excluding debug info) and if the block ends with
4237 // an unconditional branch, try to move the store to the successor block.
4238
4239 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4240 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4241 do {
4242 if (BBI != FirstInstr)
4243 --BBI;
4244 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4245
4246 return dyn_cast<StoreInst>(Val&: BBI);
4247 };
4248
4249 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4250 if (mergeStoreIntoSuccessor(SI&: *SI))
4251 return &BI;
4252
4253 return nullptr;
4254}
4255
4256void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
4257 SmallVectorImpl<BasicBlock *> &Worklist) {
4258 if (!DeadEdges.insert(V: {From, To}).second)
4259 return;
4260
4261 // Replace phi node operands in successor with poison.
4262 for (PHINode &PN : To->phis())
4263 for (Use &U : PN.incoming_values())
4264 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
4265 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
4266 addToWorklist(I: &PN);
4267 MadeIRChange = true;
4268 }
4269
4270 Worklist.push_back(Elt: To);
4271}
4272
4273// Under the assumption that I is unreachable, remove it and following
4274// instructions. Changes are reported directly to MadeIRChange.
4275void InstCombinerImpl::handleUnreachableFrom(
4276 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
4277 BasicBlock *BB = I->getParent();
4278 for (Instruction &Inst : make_early_inc_range(
4279 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
4280 y: std::next(x: I->getReverseIterator())))) {
4281 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4282 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
4283 MadeIRChange = true;
4284 }
4285 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4286 continue;
4287 // RemoveDIs: erase debug-info on this instruction manually.
4288 Inst.dropDbgRecords();
4289 eraseInstFromFunction(I&: Inst);
4290 MadeIRChange = true;
4291 }
4292
4293 SmallVector<Value *> Changed;
4294 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
4295 MadeIRChange = true;
4296 for (Value *V : Changed)
4297 addToWorklist(I: cast<Instruction>(Val: V));
4298 }
4299
4300 // Handle potentially dead successors.
4301 for (BasicBlock *Succ : successors(BB))
4302 addDeadEdge(From: BB, To: Succ, Worklist);
4303}
4304
4305void InstCombinerImpl::handlePotentiallyDeadBlocks(
4306 SmallVectorImpl<BasicBlock *> &Worklist) {
4307 while (!Worklist.empty()) {
4308 BasicBlock *BB = Worklist.pop_back_val();
4309 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
4310 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
4311 }))
4312 continue;
4313
4314 handleUnreachableFrom(I: &BB->front(), Worklist);
4315 }
4316}
4317
4318void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4319 BasicBlock *LiveSucc) {
4320 SmallVector<BasicBlock *> Worklist;
4321 for (BasicBlock *Succ : successors(BB)) {
4322 // The live successor isn't dead.
4323 if (Succ == LiveSucc)
4324 continue;
4325
4326 addDeadEdge(From: BB, To: Succ, Worklist);
4327 }
4328
4329 handlePotentiallyDeadBlocks(Worklist);
4330}
4331
4332Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
4333 if (BI.isUnconditional())
4334 return visitUnconditionalBranchInst(BI);
4335
4336 // Change br (not X), label True, label False to: br X, label False, True
4337 Value *Cond = BI.getCondition();
4338 Value *X;
4339 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4340 // Swap Destinations and condition...
4341 BI.swapSuccessors();
4342 if (BPI)
4343 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4344 return replaceOperand(I&: BI, OpNum: 0, V: X);
4345 }
4346
4347 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4348 // This is done by inverting the condition and swapping successors:
4349 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4350 Value *Y;
4351 if (isa<SelectInst>(Val: Cond) &&
4352 match(V: Cond,
4353 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4354 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4355 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4356
4357 // Set weights for the new OR select instruction too.
4358 if (!ProfcheckDisableMetadataFixes) {
4359 if (auto *OrInst = dyn_cast<Instruction>(Val: Or)) {
4360 if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
4361 SmallVector<uint32_t> Weights;
4362 if (extractBranchWeights(I: *CondInst, Weights)) {
4363 assert(Weights.size() == 2 &&
4364 "Unexpected number of branch weights!");
4365 std::swap(a&: Weights[0], b&: Weights[1]);
4366 setBranchWeights(I&: *OrInst, Weights, /*IsExpected=*/false);
4367 }
4368 }
4369 }
4370 }
4371 BI.swapSuccessors();
4372 if (BPI)
4373 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4374 return replaceOperand(I&: BI, OpNum: 0, V: Or);
4375 }
4376
4377 // If the condition is irrelevant, remove the use so that other
4378 // transforms on the condition become more effective.
4379 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
4380 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
4381
4382 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4383 CmpPredicate Pred;
4384 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4385 !isCanonicalPredicate(Pred)) {
4386 // Swap destinations and condition.
4387 auto *Cmp = cast<CmpInst>(Val: Cond);
4388 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4389 BI.swapSuccessors();
4390 if (BPI)
4391 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4392 Worklist.push(I: Cmp);
4393 return &BI;
4394 }
4395
4396 if (isa<UndefValue>(Val: Cond)) {
4397 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
4398 return nullptr;
4399 }
4400 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4401 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4402 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4403 return nullptr;
4404 }
4405
4406 // Replace all dominated uses of the condition with true/false
4407 // Ignore constant expressions to avoid iterating over uses on other
4408 // functions.
4409 if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) {
4410 for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4411 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0));
4412 if (DT.dominates(BBE: Edge0, U)) {
4413 replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4414 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4415 continue;
4416 }
4417 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1));
4418 if (DT.dominates(BBE: Edge1, U)) {
4419 replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4420 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4421 }
4422 }
4423 }
4424
4425 DC.registerBranch(BI: &BI);
4426 return nullptr;
4427}
4428
4429// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4430// we can prove that both (switch C) and (switch X) go to the default when cond
4431// is false/true.
4432static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4433 SelectInst *Select,
4434 bool IsTrueArm) {
4435 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4436 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4437 if (!C)
4438 return nullptr;
4439
4440 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4441 if (CstBB != SI.getDefaultDest())
4442 return nullptr;
4443 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
4444 CmpPredicate Pred;
4445 const APInt *RHSC;
4446 if (!match(V: Select->getCondition(),
4447 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4448 return nullptr;
4449 if (IsTrueArm)
4450 Pred = ICmpInst::getInversePredicate(pred: Pred);
4451
4452 // See whether we can replace the select with X
4453 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4454 for (auto Case : SI.cases())
4455 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4456 return nullptr;
4457
4458 return X;
4459}
4460
4461Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4462 Value *Cond = SI.getCondition();
4463 Value *Op0;
4464 const APInt *CondOpC;
4465 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4466
4467 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4468 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))))
4469 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4470 return [](const APInt &Case, const APInt &C) { return Case - C; };
4471
4472 if (match(V: Cond, P: m_Sub(L: m_APInt(Res&: CondOpC), R: m_Value(V&: Op0))))
4473 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4474 return [](const APInt &Case, const APInt &C) { return C - Case; };
4475
4476 if (match(V: Cond, P: m_Xor(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))) &&
4477 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4478 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4479 // Prevent creation of large case values by excluding extremes.
4480 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4481
4482 return nullptr;
4483 };
4484
4485 // Attempt to invert and simplify the switch condition, as long as the
4486 // condition is not used further, as it may not be profitable otherwise.
4487 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4488 for (auto &Case : SI.cases()) {
4489 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4490 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: New));
4491 }
4492 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4493 }
4494
4495 uint64_t ShiftAmt;
4496 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4497 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4498 all_of(Range: SI.cases(), P: [&](const auto &Case) {
4499 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4500 })) {
4501 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4502 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4503 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4504 Shl->hasOneUse()) {
4505 Value *NewCond = Op0;
4506 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4507 // If the shift may wrap, we need to mask off the shifted bits.
4508 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4509 NewCond = Builder.CreateAnd(
4510 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4511 }
4512 for (auto Case : SI.cases()) {
4513 const APInt &CaseVal = Case.getCaseValue()->getValue();
4514 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4515 : CaseVal.lshr(shiftAmt: ShiftAmt);
4516 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4517 }
4518 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4519 }
4520 }
4521
4522 // Fold switch(zext/sext(X)) into switch(X) if possible.
4523 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4524 bool IsZExt = isa<ZExtInst>(Val: Cond);
4525 Type *SrcTy = Op0->getType();
4526 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4527
4528 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4529 const APInt &CaseVal = Case.getCaseValue()->getValue();
4530 return IsZExt ? CaseVal.isIntN(N: NewWidth)
4531 : CaseVal.isSignedIntN(N: NewWidth);
4532 })) {
4533 for (auto &Case : SI.cases()) {
4534 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4535 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4536 }
4537 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4538 }
4539 }
4540
4541 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4542 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4543 if (Value *V =
4544 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4545 return replaceOperand(I&: SI, OpNum: 0, V);
4546 if (Value *V =
4547 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4548 return replaceOperand(I&: SI, OpNum: 0, V);
4549 }
4550
4551 KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4552 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4553 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4554
4555 // Compute the number of leading bits we can ignore.
4556 // TODO: A better way to determine this would use ComputeNumSignBits().
4557 for (const auto &C : SI.cases()) {
4558 LeadingKnownZeros =
4559 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4560 LeadingKnownOnes =
4561 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4562 }
4563
4564 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4565
4566 // Shrink the condition operand if the new type is smaller than the old type.
4567 // But do not shrink to a non-standard type, because backend can't generate
4568 // good code for that yet.
4569 // TODO: We can make it aggressive again after fixing PR39569.
4570 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4571 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4572 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4573 Builder.SetInsertPoint(&SI);
4574 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4575
4576 for (auto Case : SI.cases()) {
4577 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4578 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4579 }
4580 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4581 }
4582
4583 if (isa<UndefValue>(Val: Cond)) {
4584 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
4585 return nullptr;
4586 }
4587 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4588 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4589 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4590 return nullptr;
4591 }
4592
4593 return nullptr;
4594}
4595
4596Instruction *
4597InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4598 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4599 if (!WO)
4600 return nullptr;
4601
4602 Intrinsic::ID OvID = WO->getIntrinsicID();
4603 const APInt *C = nullptr;
4604 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4605 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4606 OvID == Intrinsic::umul_with_overflow)) {
4607 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4608 if (C->isAllOnes())
4609 return BinaryOperator::CreateNeg(Op: WO->getLHS());
4610 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4611 if (C->isPowerOf2()) {
4612 return BinaryOperator::CreateShl(
4613 V1: WO->getLHS(),
4614 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4615 }
4616 }
4617 }
4618
4619 // We're extracting from an overflow intrinsic. See if we're the only user.
4620 // That allows us to simplify multiple result intrinsics to simpler things
4621 // that just get one value.
4622 if (!WO->hasOneUse())
4623 return nullptr;
4624
4625 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4626 // and replace it with a traditional binary instruction.
4627 if (*EV.idx_begin() == 0) {
4628 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4629 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4630 // Replace the old instruction's uses with poison.
4631 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4632 eraseInstFromFunction(I&: *WO);
4633 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4634 }
4635
4636 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4637
4638 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4639 if (OvID == Intrinsic::usub_with_overflow)
4640 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4641
4642 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4643 // +1 is not possible because we assume signed values.
4644 if (OvID == Intrinsic::smul_with_overflow &&
4645 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
4646 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4647
4648 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4649 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4650 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4651 // Only handle even bitwidths for performance reasons.
4652 if (BitWidth % 2 == 0)
4653 return new ICmpInst(
4654 ICmpInst::ICMP_UGT, WO->getLHS(),
4655 ConstantInt::get(Ty: WO->getLHS()->getType(),
4656 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
4657 }
4658
4659 // If only the overflow result is used, and the right hand side is a
4660 // constant (or constant splat), we can remove the intrinsic by directly
4661 // checking for overflow.
4662 if (C) {
4663 // Compute the no-wrap range for LHS given RHS=C, then construct an
4664 // equivalent icmp, potentially using an offset.
4665 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4666 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4667
4668 CmpInst::Predicate Pred;
4669 APInt NewRHSC, Offset;
4670 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4671 auto *OpTy = WO->getRHS()->getType();
4672 auto *NewLHS = WO->getLHS();
4673 if (Offset != 0)
4674 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4675 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4676 ConstantInt::get(Ty: OpTy, V: NewRHSC));
4677 }
4678
4679 return nullptr;
4680}
4681
4682static Value *foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall,
4683 SelectInst *SelectInst,
4684 InstCombiner::BuilderTy &Builder) {
4685 // Helper to fold frexp of select to select of frexp.
4686
4687 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4688 return nullptr;
4689 Value *Cond = SelectInst->getCondition();
4690 Value *TrueVal = SelectInst->getTrueValue();
4691 Value *FalseVal = SelectInst->getFalseValue();
4692
4693 const APFloat *ConstVal = nullptr;
4694 Value *VarOp = nullptr;
4695 bool ConstIsTrue = false;
4696
4697 if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4698 VarOp = FalseVal;
4699 ConstIsTrue = true;
4700 } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4701 VarOp = TrueVal;
4702 ConstIsTrue = false;
4703 } else {
4704 return nullptr;
4705 }
4706
4707 Builder.SetInsertPoint(&EV);
4708
4709 CallInst *NewFrexp =
4710 Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4711 NewFrexp->copyIRFlags(V: FrexpCall);
4712
4713 Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa");
4714
4715 int Exp;
4716 APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4717
4718 Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4719
4720 Value *NewSel = Builder.CreateSelectFMF(
4721 C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4722 False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4723 return NewSel;
4724}
4725Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4726 Value *Agg = EV.getAggregateOperand();
4727
4728 if (!EV.hasIndices())
4729 return replaceInstUsesWith(I&: EV, V: Agg);
4730
4731 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4732 Q: SQ.getWithInstruction(I: &EV)))
4733 return replaceInstUsesWith(I&: EV, V);
4734
4735 Value *Cond, *TrueVal, *FalseVal;
4736 if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4737 C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4738 auto *SelInst =
4739 cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0));
4740 if (Value *Result =
4741 foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4742 return replaceInstUsesWith(I&: EV, V: Result);
4743 }
4744 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4745 // We're extracting from an insertvalue instruction, compare the indices
4746 const unsigned *exti, *exte, *insi, *inse;
4747 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4748 exte = EV.idx_end(), inse = IV->idx_end();
4749 exti != exte && insi != inse;
4750 ++exti, ++insi) {
4751 if (*insi != *exti)
4752 // The insert and extract both reference distinctly different elements.
4753 // This means the extract is not influenced by the insert, and we can
4754 // replace the aggregate operand of the extract with the aggregate
4755 // operand of the insert. i.e., replace
4756 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4757 // %E = extractvalue { i32, { i32 } } %I, 0
4758 // with
4759 // %E = extractvalue { i32, { i32 } } %A, 0
4760 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4761 Idxs: EV.getIndices());
4762 }
4763 if (exti == exte && insi == inse)
4764 // Both iterators are at the end: Index lists are identical. Replace
4765 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4766 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4767 // with "i32 42"
4768 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4769 if (exti == exte) {
4770 // The extract list is a prefix of the insert list. i.e. replace
4771 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4772 // %E = extractvalue { i32, { i32 } } %I, 1
4773 // with
4774 // %X = extractvalue { i32, { i32 } } %A, 1
4775 // %E = insertvalue { i32 } %X, i32 42, 0
4776 // by switching the order of the insert and extract (though the
4777 // insertvalue should be left in, since it may have other uses).
4778 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4779 Idxs: EV.getIndices());
4780 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4781 Idxs: ArrayRef(insi, inse));
4782 }
4783 if (insi == inse)
4784 // The insert list is a prefix of the extract list
4785 // We can simply remove the common indices from the extract and make it
4786 // operate on the inserted value instead of the insertvalue result.
4787 // i.e., replace
4788 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4789 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4790 // with
4791 // %E extractvalue { i32 } { i32 42 }, 0
4792 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4793 Idxs: ArrayRef(exti, exte));
4794 }
4795
4796 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4797 return R;
4798
4799 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4800 // Bail out if the aggregate contains scalable vector type
4801 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4802 STy && STy->isScalableTy())
4803 return nullptr;
4804
4805 // If the (non-volatile) load only has one use, we can rewrite this to a
4806 // load from a GEP. This reduces the size of the load. If a load is used
4807 // only by extractvalue instructions then this either must have been
4808 // optimized before, or it is a struct with padding, in which case we
4809 // don't want to do the transformation as it loses padding knowledge.
4810 if (L->isSimple() && L->hasOneUse()) {
4811 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4812 SmallVector<Value*, 4> Indices;
4813 // Prefix an i32 0 since we need the first element.
4814 Indices.push_back(Elt: Builder.getInt32(C: 0));
4815 for (unsigned Idx : EV.indices())
4816 Indices.push_back(Elt: Builder.getInt32(C: Idx));
4817
4818 // We need to insert these at the location of the old load, not at that of
4819 // the extractvalue.
4820 Builder.SetInsertPoint(L);
4821 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4822 Ptr: L->getPointerOperand(), IdxList: Indices);
4823 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4824 // Whatever aliasing information we had for the orignal load must also
4825 // hold for the smaller load, so propagate the annotations.
4826 NL->setAAMetadata(L->getAAMetadata());
4827 // Returning the load directly will cause the main loop to insert it in
4828 // the wrong spot, so use replaceInstUsesWith().
4829 return replaceInstUsesWith(I&: EV, V: NL);
4830 }
4831 }
4832
4833 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4834 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4835 return Res;
4836
4837 // Canonicalize extract (select Cond, TV, FV)
4838 // -> select cond, (extract TV), (extract FV)
4839 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4840 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
4841 return R;
4842
4843 // We could simplify extracts from other values. Note that nested extracts may
4844 // already be simplified implicitly by the above: extract (extract (insert) )
4845 // will be translated into extract ( insert ( extract ) ) first and then just
4846 // the value inserted, if appropriate. Similarly for extracts from single-use
4847 // loads: extract (extract (load)) will be translated to extract (load (gep))
4848 // and if again single-use then via load (gep (gep)) to load (gep).
4849 // However, double extracts from e.g. function arguments or return values
4850 // aren't handled yet.
4851 return nullptr;
4852}
4853
4854/// Return 'true' if the given typeinfo will match anything.
4855static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4856 switch (Personality) {
4857 case EHPersonality::GNU_C:
4858 case EHPersonality::GNU_C_SjLj:
4859 case EHPersonality::Rust:
4860 // The GCC C EH and Rust personality only exists to support cleanups, so
4861 // it's not clear what the semantics of catch clauses are.
4862 return false;
4863 case EHPersonality::Unknown:
4864 return false;
4865 case EHPersonality::GNU_Ada:
4866 // While __gnat_all_others_value will match any Ada exception, it doesn't
4867 // match foreign exceptions (or didn't, before gcc-4.7).
4868 return false;
4869 case EHPersonality::GNU_CXX:
4870 case EHPersonality::GNU_CXX_SjLj:
4871 case EHPersonality::GNU_ObjC:
4872 case EHPersonality::MSVC_X86SEH:
4873 case EHPersonality::MSVC_TableSEH:
4874 case EHPersonality::MSVC_CXX:
4875 case EHPersonality::CoreCLR:
4876 case EHPersonality::Wasm_CXX:
4877 case EHPersonality::XL_CXX:
4878 case EHPersonality::ZOS_CXX:
4879 return TypeInfo->isNullValue();
4880 }
4881 llvm_unreachable("invalid enum");
4882}
4883
4884static bool shorter_filter(const Value *LHS, const Value *RHS) {
4885 return
4886 cast<ArrayType>(Val: LHS->getType())->getNumElements()
4887 <
4888 cast<ArrayType>(Val: RHS->getType())->getNumElements();
4889}
4890
4891Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4892 // The logic here should be correct for any real-world personality function.
4893 // However if that turns out not to be true, the offending logic can always
4894 // be conditioned on the personality function, like the catch-all logic is.
4895 EHPersonality Personality =
4896 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4897
4898 // Simplify the list of clauses, eg by removing repeated catch clauses
4899 // (these are often created by inlining).
4900 bool MakeNewInstruction = false; // If true, recreate using the following:
4901 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4902 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4903
4904 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4905 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4906 bool isLastClause = i + 1 == e;
4907 if (LI.isCatch(Idx: i)) {
4908 // A catch clause.
4909 Constant *CatchClause = LI.getClause(Idx: i);
4910 Constant *TypeInfo = CatchClause->stripPointerCasts();
4911
4912 // If we already saw this clause, there is no point in having a second
4913 // copy of it.
4914 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4915 // This catch clause was not already seen.
4916 NewClauses.push_back(Elt: CatchClause);
4917 } else {
4918 // Repeated catch clause - drop the redundant copy.
4919 MakeNewInstruction = true;
4920 }
4921
4922 // If this is a catch-all then there is no point in keeping any following
4923 // clauses or marking the landingpad as having a cleanup.
4924 if (isCatchAll(Personality, TypeInfo)) {
4925 if (!isLastClause)
4926 MakeNewInstruction = true;
4927 CleanupFlag = false;
4928 break;
4929 }
4930 } else {
4931 // A filter clause. If any of the filter elements were already caught
4932 // then they can be dropped from the filter. It is tempting to try to
4933 // exploit the filter further by saying that any typeinfo that does not
4934 // occur in the filter can't be caught later (and thus can be dropped).
4935 // However this would be wrong, since typeinfos can match without being
4936 // equal (for example if one represents a C++ class, and the other some
4937 // class derived from it).
4938 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4939 Constant *FilterClause = LI.getClause(Idx: i);
4940 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4941 unsigned NumTypeInfos = FilterType->getNumElements();
4942
4943 // An empty filter catches everything, so there is no point in keeping any
4944 // following clauses or marking the landingpad as having a cleanup. By
4945 // dealing with this case here the following code is made a bit simpler.
4946 if (!NumTypeInfos) {
4947 NewClauses.push_back(Elt: FilterClause);
4948 if (!isLastClause)
4949 MakeNewInstruction = true;
4950 CleanupFlag = false;
4951 break;
4952 }
4953
4954 bool MakeNewFilter = false; // If true, make a new filter.
4955 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4956 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4957 // Not an empty filter - it contains at least one null typeinfo.
4958 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4959 Constant *TypeInfo =
4960 Constant::getNullValue(Ty: FilterType->getElementType());
4961 // If this typeinfo is a catch-all then the filter can never match.
4962 if (isCatchAll(Personality, TypeInfo)) {
4963 // Throw the filter away.
4964 MakeNewInstruction = true;
4965 continue;
4966 }
4967
4968 // There is no point in having multiple copies of this typeinfo, so
4969 // discard all but the first copy if there is more than one.
4970 NewFilterElts.push_back(Elt: TypeInfo);
4971 if (NumTypeInfos > 1)
4972 MakeNewFilter = true;
4973 } else {
4974 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4975 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4976 NewFilterElts.reserve(N: NumTypeInfos);
4977
4978 // Remove any filter elements that were already caught or that already
4979 // occurred in the filter. While there, see if any of the elements are
4980 // catch-alls. If so, the filter can be discarded.
4981 bool SawCatchAll = false;
4982 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4983 Constant *Elt = Filter->getOperand(i_nocapture: j);
4984 Constant *TypeInfo = Elt->stripPointerCasts();
4985 if (isCatchAll(Personality, TypeInfo)) {
4986 // This element is a catch-all. Bail out, noting this fact.
4987 SawCatchAll = true;
4988 break;
4989 }
4990
4991 // Even if we've seen a type in a catch clause, we don't want to
4992 // remove it from the filter. An unexpected type handler may be
4993 // set up for a call site which throws an exception of the same
4994 // type caught. In order for the exception thrown by the unexpected
4995 // handler to propagate correctly, the filter must be correctly
4996 // described for the call site.
4997 //
4998 // Example:
4999 //
5000 // void unexpected() { throw 1;}
5001 // void foo() throw (int) {
5002 // std::set_unexpected(unexpected);
5003 // try {
5004 // throw 2.0;
5005 // } catch (int i) {}
5006 // }
5007
5008 // There is no point in having multiple copies of the same typeinfo in
5009 // a filter, so only add it if we didn't already.
5010 if (SeenInFilter.insert(Ptr: TypeInfo).second)
5011 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
5012 }
5013 // A filter containing a catch-all cannot match anything by definition.
5014 if (SawCatchAll) {
5015 // Throw the filter away.
5016 MakeNewInstruction = true;
5017 continue;
5018 }
5019
5020 // If we dropped something from the filter, make a new one.
5021 if (NewFilterElts.size() < NumTypeInfos)
5022 MakeNewFilter = true;
5023 }
5024 if (MakeNewFilter) {
5025 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
5026 NumElements: NewFilterElts.size());
5027 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
5028 MakeNewInstruction = true;
5029 }
5030
5031 NewClauses.push_back(Elt: FilterClause);
5032
5033 // If the new filter is empty then it will catch everything so there is
5034 // no point in keeping any following clauses or marking the landingpad
5035 // as having a cleanup. The case of the original filter being empty was
5036 // already handled above.
5037 if (MakeNewFilter && !NewFilterElts.size()) {
5038 assert(MakeNewInstruction && "New filter but not a new instruction!");
5039 CleanupFlag = false;
5040 break;
5041 }
5042 }
5043 }
5044
5045 // If several filters occur in a row then reorder them so that the shortest
5046 // filters come first (those with the smallest number of elements). This is
5047 // advantageous because shorter filters are more likely to match, speeding up
5048 // unwinding, but mostly because it increases the effectiveness of the other
5049 // filter optimizations below.
5050 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5051 unsigned j;
5052 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5053 for (j = i; j != e; ++j)
5054 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
5055 break;
5056
5057 // Check whether the filters are already sorted by length. We need to know
5058 // if sorting them is actually going to do anything so that we only make a
5059 // new landingpad instruction if it does.
5060 for (unsigned k = i; k + 1 < j; ++k)
5061 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
5062 // Not sorted, so sort the filters now. Doing an unstable sort would be
5063 // correct too but reordering filters pointlessly might confuse users.
5064 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
5065 comp: shorter_filter);
5066 MakeNewInstruction = true;
5067 break;
5068 }
5069
5070 // Look for the next batch of filters.
5071 i = j + 1;
5072 }
5073
5074 // If typeinfos matched if and only if equal, then the elements of a filter L
5075 // that occurs later than a filter F could be replaced by the intersection of
5076 // the elements of F and L. In reality two typeinfos can match without being
5077 // equal (for example if one represents a C++ class, and the other some class
5078 // derived from it) so it would be wrong to perform this transform in general.
5079 // However the transform is correct and useful if F is a subset of L. In that
5080 // case L can be replaced by F, and thus removed altogether since repeating a
5081 // filter is pointless. So here we look at all pairs of filters F and L where
5082 // L follows F in the list of clauses, and remove L if every element of F is
5083 // an element of L. This can occur when inlining C++ functions with exception
5084 // specifications.
5085 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5086 // Examine each filter in turn.
5087 Value *Filter = NewClauses[i];
5088 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
5089 if (!FTy)
5090 // Not a filter - skip it.
5091 continue;
5092 unsigned FElts = FTy->getNumElements();
5093 // Examine each filter following this one. Doing this backwards means that
5094 // we don't have to worry about filters disappearing under us when removed.
5095 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5096 Value *LFilter = NewClauses[j];
5097 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
5098 if (!LTy)
5099 // Not a filter - skip it.
5100 continue;
5101 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5102 // an element of LFilter, then discard LFilter.
5103 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5104 // If Filter is empty then it is a subset of LFilter.
5105 if (!FElts) {
5106 // Discard LFilter.
5107 NewClauses.erase(CI: J);
5108 MakeNewInstruction = true;
5109 // Move on to the next filter.
5110 continue;
5111 }
5112 unsigned LElts = LTy->getNumElements();
5113 // If Filter is longer than LFilter then it cannot be a subset of it.
5114 if (FElts > LElts)
5115 // Move on to the next filter.
5116 continue;
5117 // At this point we know that LFilter has at least one element.
5118 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
5119 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5120 // already know that Filter is not longer than LFilter).
5121 if (isa<ConstantAggregateZero>(Val: Filter)) {
5122 assert(FElts <= LElts && "Should have handled this case earlier!");
5123 // Discard LFilter.
5124 NewClauses.erase(CI: J);
5125 MakeNewInstruction = true;
5126 }
5127 // Move on to the next filter.
5128 continue;
5129 }
5130 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
5131 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
5132 // Since Filter is non-empty and contains only zeros, it is a subset of
5133 // LFilter iff LFilter contains a zero.
5134 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5135 for (unsigned l = 0; l != LElts; ++l)
5136 if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
5137 // LFilter contains a zero - discard it.
5138 NewClauses.erase(CI: J);
5139 MakeNewInstruction = true;
5140 break;
5141 }
5142 // Move on to the next filter.
5143 continue;
5144 }
5145 // At this point we know that both filters are ConstantArrays. Loop over
5146 // operands to see whether every element of Filter is also an element of
5147 // LFilter. Since filters tend to be short this is probably faster than
5148 // using a method that scales nicely.
5149 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
5150 bool AllFound = true;
5151 for (unsigned f = 0; f != FElts; ++f) {
5152 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
5153 AllFound = false;
5154 for (unsigned l = 0; l != LElts; ++l) {
5155 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
5156 if (LTypeInfo == FTypeInfo) {
5157 AllFound = true;
5158 break;
5159 }
5160 }
5161 if (!AllFound)
5162 break;
5163 }
5164 if (AllFound) {
5165 // Discard LFilter.
5166 NewClauses.erase(CI: J);
5167 MakeNewInstruction = true;
5168 }
5169 // Move on to the next filter.
5170 }
5171 }
5172
5173 // If we changed any of the clauses, replace the old landingpad instruction
5174 // with a new one.
5175 if (MakeNewInstruction) {
5176 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
5177 NumReservedClauses: NewClauses.size());
5178 for (Constant *C : NewClauses)
5179 NLI->addClause(ClauseVal: C);
5180 // A landing pad with no clauses must have the cleanup flag set. It is
5181 // theoretically possible, though highly unlikely, that we eliminated all
5182 // clauses. If so, force the cleanup flag to true.
5183 if (NewClauses.empty())
5184 CleanupFlag = true;
5185 NLI->setCleanup(CleanupFlag);
5186 return NLI;
5187 }
5188
5189 // Even if none of the clauses changed, we may nonetheless have understood
5190 // that the cleanup flag is pointless. Clear it if so.
5191 if (LI.isCleanup() != CleanupFlag) {
5192 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5193 LI.setCleanup(CleanupFlag);
5194 return &LI;
5195 }
5196
5197 return nullptr;
5198}
5199
5200Value *
5201InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
5202 // Try to push freeze through instructions that propagate but don't produce
5203 // poison as far as possible. If an operand of freeze follows three
5204 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5205 // guaranteed-non-poison operands then push the freeze through to the one
5206 // operand that is not guaranteed non-poison. The actual transform is as
5207 // follows.
5208 // Op1 = ... ; Op1 can be posion
5209 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5210 // ; single guaranteed-non-poison operands
5211 // ... = Freeze(Op0)
5212 // =>
5213 // Op1 = ...
5214 // Op1.fr = Freeze(Op1)
5215 // ... = Inst(Op1.fr, NonPoisonOps...)
5216 auto *OrigOp = OrigFI.getOperand(i_nocapture: 0);
5217 auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
5218
5219 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5220 // potentially reduces their optimization potential, so let's only do this iff
5221 // the OrigOp is only used by the freeze.
5222 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp))
5223 return nullptr;
5224
5225 // We can't push the freeze through an instruction which can itself create
5226 // poison. If the only source of new poison is flags, we can simply
5227 // strip them (since we know the only use is the freeze and nothing can
5228 // benefit from them.)
5229 if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
5230 /*ConsiderFlagsAndMetadata*/ false))
5231 return nullptr;
5232
5233 // If operand is guaranteed not to be poison, there is no need to add freeze
5234 // to the operand. So we first find the operand that is not guaranteed to be
5235 // poison.
5236 Value *MaybePoisonOperand = nullptr;
5237 for (Value *V : OrigOpInst->operands()) {
5238 if (isa<MetadataAsValue>(Val: V) || isGuaranteedNotToBeUndefOrPoison(V) ||
5239 // Treat identical operands as a single operand.
5240 (MaybePoisonOperand && MaybePoisonOperand == V))
5241 continue;
5242 if (!MaybePoisonOperand)
5243 MaybePoisonOperand = V;
5244 else
5245 return nullptr;
5246 }
5247
5248 OrigOpInst->dropPoisonGeneratingAnnotations();
5249
5250 // If all operands are guaranteed to be non-poison, we can drop freeze.
5251 if (!MaybePoisonOperand)
5252 return OrigOp;
5253
5254 Builder.SetInsertPoint(OrigOpInst);
5255 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5256 V: MaybePoisonOperand, Name: MaybePoisonOperand->getName() + ".fr");
5257
5258 OrigOpInst->replaceUsesOfWith(From: MaybePoisonOperand, To: FrozenMaybePoisonOperand);
5259 return OrigOp;
5260}
5261
5262Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
5263 PHINode *PN) {
5264 // Detect whether this is a recurrence with a start value and some number of
5265 // backedge values. We'll check whether we can push the freeze through the
5266 // backedge values (possibly dropping poison flags along the way) until we
5267 // reach the phi again. In that case, we can move the freeze to the start
5268 // value.
5269 Use *StartU = nullptr;
5270 SmallVector<Value *> Worklist;
5271 for (Use &U : PN->incoming_values()) {
5272 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
5273 // Add backedge value to worklist.
5274 Worklist.push_back(Elt: U.get());
5275 continue;
5276 }
5277
5278 // Don't bother handling multiple start values.
5279 if (StartU)
5280 return nullptr;
5281 StartU = &U;
5282 }
5283
5284 if (!StartU || Worklist.empty())
5285 return nullptr; // Not a recurrence.
5286
5287 Value *StartV = StartU->get();
5288 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
5289 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
5290 // We can't insert freeze if the start value is the result of the
5291 // terminator (e.g. an invoke).
5292 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5293 return nullptr;
5294
5295 SmallPtrSet<Value *, 32> Visited;
5296 SmallVector<Instruction *> DropFlags;
5297 while (!Worklist.empty()) {
5298 Value *V = Worklist.pop_back_val();
5299 if (!Visited.insert(Ptr: V).second)
5300 continue;
5301
5302 if (Visited.size() > 32)
5303 return nullptr; // Limit the total number of values we inspect.
5304
5305 // Assume that PN is non-poison, because it will be after the transform.
5306 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5307 continue;
5308
5309 Instruction *I = dyn_cast<Instruction>(Val: V);
5310 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
5311 /*ConsiderFlagsAndMetadata*/ false))
5312 return nullptr;
5313
5314 DropFlags.push_back(Elt: I);
5315 append_range(C&: Worklist, R: I->operands());
5316 }
5317
5318 for (Instruction *I : DropFlags)
5319 I->dropPoisonGeneratingAnnotations();
5320
5321 if (StartNeedsFreeze) {
5322 Builder.SetInsertPoint(StartBB->getTerminator());
5323 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
5324 Name: StartV->getName() + ".fr");
5325 replaceUse(U&: *StartU, NewValue: FrozenStartV);
5326 }
5327 return replaceInstUsesWith(I&: FI, V: PN);
5328}
5329
5330bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
5331 Value *Op = FI.getOperand(i_nocapture: 0);
5332
5333 if (isa<Constant>(Val: Op) || Op->hasOneUse())
5334 return false;
5335
5336 // Move the freeze directly after the definition of its operand, so that
5337 // it dominates the maximum number of uses. Note that it may not dominate
5338 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5339 // the normal/default destination. This is why the domination check in the
5340 // replacement below is still necessary.
5341 BasicBlock::iterator MoveBefore;
5342 if (isa<Argument>(Val: Op)) {
5343 MoveBefore =
5344 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5345 } else {
5346 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5347 if (!MoveBeforeOpt)
5348 return false;
5349 MoveBefore = *MoveBeforeOpt;
5350 }
5351
5352 // Re-point iterator to come after any debug-info records.
5353 MoveBefore.setHeadBit(false);
5354
5355 bool Changed = false;
5356 if (&FI != &*MoveBefore) {
5357 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
5358 Changed = true;
5359 }
5360
5361 Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
5362 bool Dominates = DT.dominates(Def: &FI, U);
5363 Changed |= Dominates;
5364 return Dominates;
5365 });
5366
5367 return Changed;
5368}
5369
5370// Check if any direct or bitcast user of this value is a shuffle instruction.
5371static bool isUsedWithinShuffleVector(Value *V) {
5372 for (auto *U : V->users()) {
5373 if (isa<ShuffleVectorInst>(Val: U))
5374 return true;
5375 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5376 return true;
5377 }
5378 return false;
5379}
5380
5381Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5382 Value *Op0 = I.getOperand(i_nocapture: 0);
5383
5384 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5385 return replaceInstUsesWith(I, V);
5386
5387 // freeze (phi const, x) --> phi const, (freeze x)
5388 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5389 if (Instruction *NV = foldOpIntoPhi(I, PN))
5390 return NV;
5391 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5392 return NV;
5393 }
5394
5395 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5396 return replaceInstUsesWith(I, V: NI);
5397
5398 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5399 // - or: pick -1
5400 // - select's condition: if the true value is constant, choose it by making
5401 // the condition true.
5402 // - phi: pick the common constant across operands
5403 // - default: pick 0
5404 //
5405 // Note that this transform is intentionally done here rather than
5406 // via an analysis in InstSimplify or at individual user sites. That is
5407 // because we must produce the same value for all uses of the freeze -
5408 // it's the reason "freeze" exists!
5409 //
5410 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5411 // duplicating logic for binops at least.
5412 auto getUndefReplacement = [&](Type *Ty) {
5413 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5414 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5415 // removed.
5416 Constant *BestValue = nullptr;
5417 for (Value *V : PN.incoming_values()) {
5418 if (match(V, P: m_Freeze(Op: m_Undef())))
5419 continue;
5420
5421 Constant *C = dyn_cast<Constant>(Val: V);
5422 if (!C)
5423 return nullptr;
5424
5425 if (!isGuaranteedNotToBeUndefOrPoison(V: C))
5426 return nullptr;
5427
5428 if (BestValue && BestValue != C)
5429 return nullptr;
5430
5431 BestValue = C;
5432 }
5433 return BestValue;
5434 };
5435
5436 Value *NullValue = Constant::getNullValue(Ty);
5437 Value *BestValue = nullptr;
5438 for (auto *U : I.users()) {
5439 Value *V = NullValue;
5440 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5441 V = ConstantInt::getAllOnesValue(Ty);
5442 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5443 V = ConstantInt::getTrue(Ty);
5444 else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5445 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5446 V = NullValue;
5447 } else if (auto *PHI = dyn_cast<PHINode>(Val: U)) {
5448 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5449 V = MaybeV;
5450 }
5451
5452 if (!BestValue)
5453 BestValue = V;
5454 else if (BestValue != V)
5455 BestValue = NullValue;
5456 }
5457 assert(BestValue && "Must have at least one use");
5458 assert(BestValue != &I && "Cannot replace with itself");
5459 return BestValue;
5460 };
5461
5462 if (match(V: Op0, P: m_Undef())) {
5463 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5464 // a shuffle. This may improve codegen for shuffles that allow
5465 // unspecified inputs.
5466 if (isUsedWithinShuffleVector(V: &I))
5467 return nullptr;
5468 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
5469 }
5470
5471 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5472 Type *Ty = C->getType();
5473 auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5474 if (!VTy)
5475 return nullptr;
5476 unsigned NumElts = VTy->getNumElements();
5477 Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5478 for (unsigned i = 0; i != NumElts; ++i) {
5479 Constant *EltC = C->getAggregateElement(Elt: i);
5480 if (EltC && !match(V: EltC, P: m_Undef())) {
5481 BestValue = EltC;
5482 break;
5483 }
5484 }
5485 return Constant::replaceUndefsWith(C, Replacement: BestValue);
5486 };
5487
5488 Constant *C;
5489 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5490 !C->containsConstantExpression()) {
5491 if (Constant *Repl = getFreezeVectorReplacement(C))
5492 return replaceInstUsesWith(I, V: Repl);
5493 }
5494
5495 // Replace uses of Op with freeze(Op).
5496 if (freezeOtherUses(FI&: I))
5497 return &I;
5498
5499 return nullptr;
5500}
5501
5502/// Check for case where the call writes to an otherwise dead alloca. This
5503/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5504/// helper *only* analyzes the write; doesn't check any other legality aspect.
5505static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5506 auto *CB = dyn_cast<CallBase>(Val: I);
5507 if (!CB)
5508 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5509 // to allow reload along used path as described below. Otherwise, this
5510 // is simply a store to a dead allocation which will be removed.
5511 return false;
5512 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5513 if (!Dest)
5514 return false;
5515 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
5516 if (!AI)
5517 // TODO: allow malloc?
5518 return false;
5519 // TODO: allow memory access dominated by move point? Note that since AI
5520 // could have a reference to itself captured by the call, we would need to
5521 // account for cycles in doing so.
5522 SmallVector<const User *> AllocaUsers;
5523 SmallPtrSet<const User *, 4> Visited;
5524 auto pushUsers = [&](const Instruction &I) {
5525 for (const User *U : I.users()) {
5526 if (Visited.insert(Ptr: U).second)
5527 AllocaUsers.push_back(Elt: U);
5528 }
5529 };
5530 pushUsers(*AI);
5531 while (!AllocaUsers.empty()) {
5532 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5533 if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) {
5534 pushUsers(*UserI);
5535 continue;
5536 }
5537 if (UserI == CB)
5538 continue;
5539 // TODO: support lifetime.start/end here
5540 return false;
5541 }
5542 return true;
5543}
5544
5545/// Try to move the specified instruction from its current block into the
5546/// beginning of DestBlock, which can only happen if it's safe to move the
5547/// instruction past all of the instructions between it and the end of its
5548/// block.
5549bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5550 BasicBlock *DestBlock) {
5551 BasicBlock *SrcBlock = I->getParent();
5552
5553 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5554 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5555 I->isTerminator())
5556 return false;
5557
5558 // Do not sink static or dynamic alloca instructions. Static allocas must
5559 // remain in the entry block, and dynamic allocas must not be sunk in between
5560 // a stacksave / stackrestore pair, which would incorrectly shorten its
5561 // lifetime.
5562 if (isa<AllocaInst>(Val: I))
5563 return false;
5564
5565 // Do not sink into catchswitch blocks.
5566 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5567 return false;
5568
5569 // Do not sink convergent call instructions.
5570 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5571 if (CI->isConvergent())
5572 return false;
5573 }
5574
5575 // Unless we can prove that the memory write isn't visibile except on the
5576 // path we're sinking to, we must bail.
5577 if (I->mayWriteToMemory()) {
5578 if (!SoleWriteToDeadLocal(I, TLI))
5579 return false;
5580 }
5581
5582 // We can only sink load instructions if there is nothing between the load and
5583 // the end of block that could change the value.
5584 if (I->mayReadFromMemory() &&
5585 !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5586 // We don't want to do any sophisticated alias analysis, so we only check
5587 // the instructions after I in I's parent block if we try to sink to its
5588 // successor block.
5589 if (DestBlock->getUniquePredecessor() != I->getParent())
5590 return false;
5591 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5592 E = I->getParent()->end();
5593 Scan != E; ++Scan)
5594 if (Scan->mayWriteToMemory())
5595 return false;
5596 }
5597
5598 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5599 auto *I = dyn_cast<Instruction>(Val: U->getUser());
5600 if (I && I->getParent() != DestBlock) {
5601 Worklist.add(I);
5602 return true;
5603 }
5604 return false;
5605 });
5606 /// FIXME: We could remove droppable uses that are not dominated by
5607 /// the new position.
5608
5609 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5610 I->moveBefore(BB&: *DestBlock, I: InsertPos);
5611 ++NumSunkInst;
5612
5613 // Also sink all related debug uses from the source basic block. Otherwise we
5614 // get debug use before the def. Attempt to salvage debug uses first, to
5615 // maximise the range variables have location for. If we cannot salvage, then
5616 // mark the location undef: we know it was supposed to receive a new location
5617 // here, but that computation has been sunk.
5618 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5619 findDbgUsers(V: I, DbgVariableRecords);
5620 if (!DbgVariableRecords.empty())
5621 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5622 DPUsers&: DbgVariableRecords);
5623
5624 // PS: there are numerous flaws with this behaviour, not least that right now
5625 // assignments can be re-ordered past other assignments to the same variable
5626 // if they use different Values. Creating more undef assignements can never be
5627 // undone. And salvaging all users outside of this block can un-necessarily
5628 // alter the lifetime of the live-value that the variable refers to.
5629 // Some of these things can be resolved by tolerating debug use-before-defs in
5630 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5631 // being used for more architectures.
5632
5633 return true;
5634}
5635
5636void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5637 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5638 BasicBlock *DestBlock,
5639 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5640 // For all debug values in the destination block, the sunk instruction
5641 // will still be available, so they do not need to be dropped.
5642
5643 // Fetch all DbgVariableRecords not already in the destination.
5644 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5645 for (auto &DVR : DbgVariableRecords)
5646 if (DVR->getParent() != DestBlock)
5647 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5648
5649 // Fetch a second collection, of DbgVariableRecords in the source block that
5650 // we're going to sink.
5651 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5652 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5653 if (DVR->getParent() == SrcBlock)
5654 DbgVariableRecordsToSink.push_back(Elt: DVR);
5655
5656 // Sort DbgVariableRecords according to their position in the block. This is a
5657 // partial order: DbgVariableRecords attached to different instructions will
5658 // be ordered by the instruction order, but DbgVariableRecords attached to the
5659 // same instruction won't have an order.
5660 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5661 return B->getInstruction()->comesBefore(Other: A->getInstruction());
5662 };
5663 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5664
5665 // If there are two assignments to the same variable attached to the same
5666 // instruction, the ordering between the two assignments is important. Scan
5667 // for this (rare) case and establish which is the last assignment.
5668 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5669 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5670 if (DbgVariableRecordsToSink.size() > 1) {
5671 SmallDenseMap<InstVarPair, unsigned> CountMap;
5672 // Count how many assignments to each variable there is per instruction.
5673 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5674 DebugVariable DbgUserVariable =
5675 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5676 DVR->getDebugLoc()->getInlinedAt());
5677 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
5678 }
5679
5680 // If there are any instructions with two assignments, add them to the
5681 // FilterOutMap to record that they need extra filtering.
5682 SmallPtrSet<const Instruction *, 4> DupSet;
5683 for (auto It : CountMap) {
5684 if (It.second > 1) {
5685 FilterOutMap[It.first] = nullptr;
5686 DupSet.insert(Ptr: It.first.first);
5687 }
5688 }
5689
5690 // For all instruction/variable pairs needing extra filtering, find the
5691 // latest assignment.
5692 for (const Instruction *Inst : DupSet) {
5693 for (DbgVariableRecord &DVR :
5694 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5695 DebugVariable DbgUserVariable =
5696 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5697 DVR.getDebugLoc()->getInlinedAt());
5698 auto FilterIt =
5699 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5700 if (FilterIt == FilterOutMap.end())
5701 continue;
5702 if (FilterIt->second != nullptr)
5703 continue;
5704 FilterIt->second = &DVR;
5705 }
5706 }
5707 }
5708
5709 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5710 // out any duplicate assignments identified above.
5711 SmallVector<DbgVariableRecord *, 2> DVRClones;
5712 SmallSet<DebugVariable, 4> SunkVariables;
5713 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5714 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5715 continue;
5716
5717 DebugVariable DbgUserVariable =
5718 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5719 DVR->getDebugLoc()->getInlinedAt());
5720
5721 // For any variable where there were multiple assignments in the same place,
5722 // ignore all but the last assignment.
5723 if (!FilterOutMap.empty()) {
5724 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5725 auto It = FilterOutMap.find(Val: IVP);
5726
5727 // Filter out.
5728 if (It != FilterOutMap.end() && It->second != DVR)
5729 continue;
5730 }
5731
5732 if (!SunkVariables.insert(V: DbgUserVariable).second)
5733 continue;
5734
5735 if (DVR->isDbgAssign())
5736 continue;
5737
5738 DVRClones.emplace_back(Args: DVR->clone());
5739 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5740 }
5741
5742 // Perform salvaging without the clones, then sink the clones.
5743 if (DVRClones.empty())
5744 return;
5745
5746 salvageDebugInfoForDbgValues(I&: *I, DPInsns: DbgVariableRecordsToSalvage);
5747
5748 // The clones are in reverse order of original appearance. Assert that the
5749 // head bit is set on the iterator as we _should_ have received it via
5750 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5751 // we'll repeatedly insert at the head, such as:
5752 // DVR-3 (third insertion goes here)
5753 // DVR-2 (second insertion goes here)
5754 // DVR-1 (first insertion goes here)
5755 // Any-Prior-DVRs
5756 // InsertPtInst
5757 assert(InsertPos.getHeadBit());
5758 for (DbgVariableRecord *DVRClone : DVRClones) {
5759 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5760 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5761 }
5762}
5763
5764bool InstCombinerImpl::run() {
5765 while (!Worklist.isEmpty()) {
5766 // Walk deferred instructions in reverse order, and push them to the
5767 // worklist, which means they'll end up popped from the worklist in-order.
5768 while (Instruction *I = Worklist.popDeferred()) {
5769 // Check to see if we can DCE the instruction. We do this already here to
5770 // reduce the number of uses and thus allow other folds to trigger.
5771 // Note that eraseInstFromFunction() may push additional instructions on
5772 // the deferred worklist, so this will DCE whole instruction chains.
5773 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5774 eraseInstFromFunction(I&: *I);
5775 ++NumDeadInst;
5776 continue;
5777 }
5778
5779 Worklist.push(I);
5780 }
5781
5782 Instruction *I = Worklist.removeOne();
5783 if (I == nullptr) continue; // skip null values.
5784
5785 // Check to see if we can DCE the instruction.
5786 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5787 eraseInstFromFunction(I&: *I);
5788 ++NumDeadInst;
5789 continue;
5790 }
5791
5792 if (!DebugCounter::shouldExecute(Counter&: VisitCounter))
5793 continue;
5794
5795 // See if we can trivially sink this instruction to its user if we can
5796 // prove that the successor is not executed more frequently than our block.
5797 // Return the UserBlock if successful.
5798 auto getOptionalSinkBlockForInst =
5799 [this](Instruction *I) -> std::optional<BasicBlock *> {
5800 if (!EnableCodeSinking)
5801 return std::nullopt;
5802
5803 BasicBlock *BB = I->getParent();
5804 BasicBlock *UserParent = nullptr;
5805 unsigned NumUsers = 0;
5806
5807 for (Use &U : I->uses()) {
5808 User *User = U.getUser();
5809 if (User->isDroppable()) {
5810 // Do not sink if there are dereferenceable assumes that would be
5811 // removed.
5812 auto II = dyn_cast<IntrinsicInst>(Val: User);
5813 if (II->getIntrinsicID() != Intrinsic::assume ||
5814 !II->getOperandBundle(Name: "dereferenceable"))
5815 continue;
5816 }
5817
5818 if (NumUsers > MaxSinkNumUsers)
5819 return std::nullopt;
5820
5821 Instruction *UserInst = cast<Instruction>(Val: User);
5822 // Special handling for Phi nodes - get the block the use occurs in.
5823 BasicBlock *UserBB = UserInst->getParent();
5824 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5825 UserBB = PN->getIncomingBlock(U);
5826 // Bail out if we have uses in different blocks. We don't do any
5827 // sophisticated analysis (i.e finding NearestCommonDominator of these
5828 // use blocks).
5829 if (UserParent && UserParent != UserBB)
5830 return std::nullopt;
5831 UserParent = UserBB;
5832
5833 // Make sure these checks are done only once, naturally we do the checks
5834 // the first time we get the userparent, this will save compile time.
5835 if (NumUsers == 0) {
5836 // Try sinking to another block. If that block is unreachable, then do
5837 // not bother. SimplifyCFG should handle it.
5838 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
5839 return std::nullopt;
5840
5841 auto *Term = UserParent->getTerminator();
5842 // See if the user is one of our successors that has only one
5843 // predecessor, so that we don't have to split the critical edge.
5844 // Another option where we can sink is a block that ends with a
5845 // terminator that does not pass control to other block (such as
5846 // return or unreachable or resume). In this case:
5847 // - I dominates the User (by SSA form);
5848 // - the User will be executed at most once.
5849 // So sinking I down to User is always profitable or neutral.
5850 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5851 return std::nullopt;
5852
5853 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5854 }
5855
5856 NumUsers++;
5857 }
5858
5859 // No user or only has droppable users.
5860 if (!UserParent)
5861 return std::nullopt;
5862
5863 return UserParent;
5864 };
5865
5866 auto OptBB = getOptionalSinkBlockForInst(I);
5867 if (OptBB) {
5868 auto *UserParent = *OptBB;
5869 // Okay, the CFG is simple enough, try to sink this instruction.
5870 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5871 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5872 MadeIRChange = true;
5873 // We'll add uses of the sunk instruction below, but since
5874 // sinking can expose opportunities for it's *operands* add
5875 // them to the worklist
5876 for (Use &U : I->operands())
5877 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5878 Worklist.push(I: OpI);
5879 }
5880 }
5881
5882 // Now that we have an instruction, try combining it to simplify it.
5883 Builder.SetInsertPoint(I);
5884 Builder.CollectMetadataToCopy(
5885 Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5886
5887#ifndef NDEBUG
5888 std::string OrigI;
5889#endif
5890 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5891 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5892
5893 if (Instruction *Result = visit(I&: *I)) {
5894 ++NumCombined;
5895 // Should we replace the old instruction with a new one?
5896 if (Result != I) {
5897 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5898 << " New = " << *Result << '\n');
5899
5900 // We copy the old instruction's DebugLoc to the new instruction, unless
5901 // InstCombine already assigned a DebugLoc to it, in which case we
5902 // should trust the more specifically selected DebugLoc.
5903 Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5904 // We also copy annotation metadata to the new instruction.
5905 Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5906 // Everything uses the new instruction now.
5907 I->replaceAllUsesWith(V: Result);
5908
5909 // Move the name to the new instruction first.
5910 Result->takeName(V: I);
5911
5912 // Insert the new instruction into the basic block...
5913 BasicBlock *InstParent = I->getParent();
5914 BasicBlock::iterator InsertPos = I->getIterator();
5915
5916 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5917 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5918 // We need to fix up the insertion point.
5919 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5920 InsertPos = InstParent->getFirstInsertionPt();
5921 else // Non-PHI -> PHI
5922 InsertPos = InstParent->getFirstNonPHIIt();
5923 }
5924
5925 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5926
5927 // Push the new instruction and any users onto the worklist.
5928 Worklist.pushUsersToWorkList(I&: *Result);
5929 Worklist.push(I: Result);
5930
5931 eraseInstFromFunction(I&: *I);
5932 } else {
5933 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5934 << " New = " << *I << '\n');
5935
5936 // If the instruction was modified, it's possible that it is now dead.
5937 // if so, remove it.
5938 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5939 eraseInstFromFunction(I&: *I);
5940 } else {
5941 Worklist.pushUsersToWorkList(I&: *I);
5942 Worklist.push(I);
5943 }
5944 }
5945 MadeIRChange = true;
5946 }
5947 }
5948
5949 Worklist.zap();
5950 return MadeIRChange;
5951}
5952
5953// Track the scopes used by !alias.scope and !noalias. In a function, a
5954// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5955// by both sets. If not, the declaration of the scope can be safely omitted.
5956// The MDNode of the scope can be omitted as well for the instructions that are
5957// part of this function. We do not do that at this point, as this might become
5958// too time consuming to do.
5959class AliasScopeTracker {
5960 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5961 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5962
5963public:
5964 void analyse(Instruction *I) {
5965 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5966 if (!I->hasMetadataOtherThanDebugLoc())
5967 return;
5968
5969 auto Track = [](Metadata *ScopeList, auto &Container) {
5970 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5971 if (!MDScopeList || !Container.insert(MDScopeList).second)
5972 return;
5973 for (const auto &MDOperand : MDScopeList->operands())
5974 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5975 Container.insert(MDScope);
5976 };
5977
5978 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5979 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5980 }
5981
5982 bool isNoAliasScopeDeclDead(Instruction *Inst) {
5983 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5984 if (!Decl)
5985 return false;
5986
5987 assert(Decl->use_empty() &&
5988 "llvm.experimental.noalias.scope.decl in use ?");
5989 const MDNode *MDSL = Decl->getScopeList();
5990 assert(MDSL->getNumOperands() == 1 &&
5991 "llvm.experimental.noalias.scope should refer to a single scope");
5992 auto &MDOperand = MDSL->getOperand(I: 0);
5993 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5994 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
5995 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
5996
5997 // Not an MDNode ? throw away.
5998 return true;
5999 }
6000};
6001
6002/// Populate the IC worklist from a function, by walking it in reverse
6003/// post-order and adding all reachable code to the worklist.
6004///
6005/// This has a couple of tricks to make the code faster and more powerful. In
6006/// particular, we constant fold and DCE instructions as we go, to avoid adding
6007/// them to the worklist (this significantly speeds up instcombine on code where
6008/// many instructions are dead or constant). Additionally, if we find a branch
6009/// whose condition is a known constant, we only visit the reachable successors.
6010bool InstCombinerImpl::prepareWorklist(Function &F) {
6011 bool MadeIRChange = false;
6012 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
6013 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6014 DenseMap<Constant *, Constant *> FoldedConstants;
6015 AliasScopeTracker SeenAliasScopes;
6016
6017 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6018 for (BasicBlock *Succ : successors(BB))
6019 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
6020 for (PHINode &PN : Succ->phis())
6021 for (Use &U : PN.incoming_values())
6022 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
6023 U.set(PoisonValue::get(T: PN.getType()));
6024 MadeIRChange = true;
6025 }
6026 };
6027
6028 for (BasicBlock *BB : RPOT) {
6029 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
6030 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
6031 })) {
6032 HandleOnlyLiveSuccessor(BB, nullptr);
6033 continue;
6034 }
6035 LiveBlocks.insert(Ptr: BB);
6036
6037 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
6038 // ConstantProp instruction if trivially constant.
6039 if (!Inst.use_empty() &&
6040 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
6041 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
6042 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6043 << '\n');
6044 Inst.replaceAllUsesWith(V: C);
6045 ++NumConstProp;
6046 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
6047 Inst.eraseFromParent();
6048 MadeIRChange = true;
6049 continue;
6050 }
6051
6052 // See if we can constant fold its operands.
6053 for (Use &U : Inst.operands()) {
6054 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
6055 continue;
6056
6057 auto *C = cast<Constant>(Val&: U);
6058 Constant *&FoldRes = FoldedConstants[C];
6059 if (!FoldRes)
6060 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
6061
6062 if (FoldRes != C) {
6063 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6064 << "\n Old = " << *C
6065 << "\n New = " << *FoldRes << '\n');
6066 U = FoldRes;
6067 MadeIRChange = true;
6068 }
6069 }
6070
6071 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6072 // these call instructions consumes non-trivial amount of time and
6073 // provides no value for the optimization.
6074 if (!Inst.isDebugOrPseudoInst()) {
6075 InstrsForInstructionWorklist.push_back(Elt: &Inst);
6076 SeenAliasScopes.analyse(I: &Inst);
6077 }
6078 }
6079
6080 // If this is a branch or switch on a constant, mark only the single
6081 // live successor. Otherwise assume all successors are live.
6082 Instruction *TI = BB->getTerminator();
6083 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
6084 if (isa<UndefValue>(Val: BI->getCondition())) {
6085 // Branch on undef is UB.
6086 HandleOnlyLiveSuccessor(BB, nullptr);
6087 continue;
6088 }
6089 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
6090 bool CondVal = Cond->getZExtValue();
6091 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
6092 continue;
6093 }
6094 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
6095 if (isa<UndefValue>(Val: SI->getCondition())) {
6096 // Switch on undef is UB.
6097 HandleOnlyLiveSuccessor(BB, nullptr);
6098 continue;
6099 }
6100 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
6101 HandleOnlyLiveSuccessor(BB,
6102 SI->findCaseValue(C: Cond)->getCaseSuccessor());
6103 continue;
6104 }
6105 }
6106 }
6107
6108 // Remove instructions inside unreachable blocks. This prevents the
6109 // instcombine code from having to deal with some bad special cases, and
6110 // reduces use counts of instructions.
6111 for (BasicBlock &BB : F) {
6112 if (LiveBlocks.count(Ptr: &BB))
6113 continue;
6114
6115 unsigned NumDeadInstInBB;
6116 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
6117
6118 MadeIRChange |= NumDeadInstInBB != 0;
6119 NumDeadInst += NumDeadInstInBB;
6120 }
6121
6122 // Once we've found all of the instructions to add to instcombine's worklist,
6123 // add them in reverse order. This way instcombine will visit from the top
6124 // of the function down. This jives well with the way that it adds all uses
6125 // of instructions to the worklist after doing a transformation, thus avoiding
6126 // some N^2 behavior in pathological cases.
6127 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
6128 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
6129 // DCE instruction if trivially dead. As we iterate in reverse program
6130 // order here, we will clean up whole chains of dead instructions.
6131 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
6132 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6133 ++NumDeadInst;
6134 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6135 salvageDebugInfo(I&: *Inst);
6136 Inst->eraseFromParent();
6137 MadeIRChange = true;
6138 continue;
6139 }
6140
6141 Worklist.push(I: Inst);
6142 }
6143
6144 return MadeIRChange;
6145}
6146
6147void InstCombiner::computeBackEdges() {
6148 // Collect backedges.
6149 SmallPtrSet<BasicBlock *, 16> Visited;
6150 for (BasicBlock *BB : RPOT) {
6151 Visited.insert(Ptr: BB);
6152 for (BasicBlock *Succ : successors(BB))
6153 if (Visited.contains(Ptr: Succ))
6154 BackEdges.insert(V: {BB, Succ});
6155 }
6156 ComputedBackEdges = true;
6157}
6158
6159static bool combineInstructionsOverFunction(
6160 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
6161 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
6162 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
6163 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
6164 const InstCombineOptions &Opts) {
6165 auto &DL = F.getDataLayout();
6166 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6167 !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
6168
6169 /// Builder - This is an IRBuilder that automatically inserts new
6170 /// instructions into the worklist when they are created.
6171 IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
6172 F.getContext(), TargetFolder(DL),
6173 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6174 Worklist.add(I);
6175 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
6176 AC.registerAssumption(CI: Assume);
6177 }));
6178
6179 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
6180
6181 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6182 // by instcombiner.
6183 bool MadeIRChange = false;
6184 if (ShouldLowerDbgDeclare)
6185 MadeIRChange = LowerDbgDeclare(F);
6186
6187 // Iterate while there is work to do.
6188 unsigned Iteration = 0;
6189 while (true) {
6190 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6191 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6192 << " on " << F.getName()
6193 << " reached; stopping without verifying fixpoint\n");
6194 break;
6195 }
6196
6197 ++Iteration;
6198 ++NumWorklistIterations;
6199 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6200 << F.getName() << "\n");
6201
6202 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6203 BPI, PSI, DL, RPOT);
6204 IC.MaxArraySizeForCombine = MaxArraySize;
6205 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6206 MadeChangeInThisIteration |= IC.run();
6207 if (!MadeChangeInThisIteration)
6208 break;
6209
6210 MadeIRChange = true;
6211 if (Iteration > Opts.MaxIterations) {
6212 reportFatalUsageError(
6213 reason: "Instruction Combining on " + Twine(F.getName()) +
6214 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6215 " iterations. " +
6216 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6217 "'instcombine-no-verify-fixpoint' to suppress this error.");
6218 }
6219 }
6220
6221 if (Iteration == 1)
6222 ++NumOneIteration;
6223 else if (Iteration == 2)
6224 ++NumTwoIterations;
6225 else if (Iteration == 3)
6226 ++NumThreeIterations;
6227 else
6228 ++NumFourOrMoreIterations;
6229
6230 return MadeIRChange;
6231}
6232
6233InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
6234
6235void InstCombinePass::printPipeline(
6236 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6237 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6238 OS, MapClassName2PassName);
6239 OS << '<';
6240 OS << "max-iterations=" << Options.MaxIterations << ";";
6241 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6242 OS << '>';
6243}
6244
6245char InstCombinePass::ID = 0;
6246
6247PreservedAnalyses InstCombinePass::run(Function &F,
6248 FunctionAnalysisManager &AM) {
6249 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
6250 // No changes since last InstCombine pass, exit early.
6251 if (LRT.shouldSkip(ID: &ID))
6252 return PreservedAnalyses::all();
6253
6254 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
6255 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
6256 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
6257 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
6258 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
6259
6260 auto *AA = &AM.getResult<AAManager>(IR&: F);
6261 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
6262 ProfileSummaryInfo *PSI =
6263 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
6264 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6265 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
6266 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
6267
6268 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6269 BFI, BPI, PSI, Opts: Options)) {
6270 // No changes, all analyses are preserved.
6271 LRT.update(ID: &ID, /*Changed=*/false);
6272 return PreservedAnalyses::all();
6273 }
6274
6275 // Mark all the analyses that instcombine updates as preserved.
6276 PreservedAnalyses PA;
6277 LRT.update(ID: &ID, /*Changed=*/true);
6278 PA.preserve<LastRunTrackingAnalysis>();
6279 PA.preserveSet<CFGAnalyses>();
6280 return PA;
6281}
6282
6283void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
6284 AU.setPreservesCFG();
6285 AU.addRequired<AAResultsWrapperPass>();
6286 AU.addRequired<AssumptionCacheTracker>();
6287 AU.addRequired<TargetLibraryInfoWrapperPass>();
6288 AU.addRequired<TargetTransformInfoWrapperPass>();
6289 AU.addRequired<DominatorTreeWrapperPass>();
6290 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
6291 AU.addPreserved<DominatorTreeWrapperPass>();
6292 AU.addPreserved<AAResultsWrapperPass>();
6293 AU.addPreserved<BasicAAWrapperPass>();
6294 AU.addPreserved<GlobalsAAWrapperPass>();
6295 AU.addRequired<ProfileSummaryInfoWrapperPass>();
6296 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
6297}
6298
6299bool InstructionCombiningPass::runOnFunction(Function &F) {
6300 if (skipFunction(F))
6301 return false;
6302
6303 // Required analyses.
6304 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6305 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6306 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6307 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6308 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6309 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6310
6311 // Optional analyses.
6312 ProfileSummaryInfo *PSI =
6313 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6314 BlockFrequencyInfo *BFI =
6315 (PSI && PSI->hasProfileSummary()) ?
6316 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6317 nullptr;
6318 BranchProbabilityInfo *BPI = nullptr;
6319 if (auto *WrapperPass =
6320 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6321 BPI = &WrapperPass->getBPI();
6322
6323 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6324 BFI, BPI, PSI, Opts: InstCombineOptions());
6325}
6326
6327char InstructionCombiningPass::ID = 0;
6328
6329InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {}
6330
6331INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6332 "Combine redundant instructions", false, false)
6333INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6334INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6335INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6336INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6337INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6338INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6339INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6340INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6341INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6342INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6343 "Combine redundant instructions", false, false)
6344
6345// Initialization Routines.
6346void llvm::initializeInstCombine(PassRegistry &Registry) {
6347 initializeInstructionCombiningPassPass(Registry);
6348}
6349
6350FunctionPass *llvm::createInstructionCombiningPass() {
6351 return new InstructionCombiningPass();
6352}
6353