1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AssumptionCache.h"
45#include "llvm/Analysis/BasicAliasAnalysis.h"
46#include "llvm/Analysis/BlockFrequencyInfo.h"
47#include "llvm/Analysis/CFG.h"
48#include "llvm/Analysis/ConstantFolding.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/Analysis/InstructionSimplify.h"
51#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53#include "llvm/Analysis/MemoryBuiltins.h"
54#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55#include "llvm/Analysis/ProfileSummaryInfo.h"
56#include "llvm/Analysis/TargetFolder.h"
57#include "llvm/Analysis/TargetLibraryInfo.h"
58#include "llvm/Analysis/TargetTransformInfo.h"
59#include "llvm/Analysis/Utils/Local.h"
60#include "llvm/Analysis/ValueTracking.h"
61#include "llvm/Analysis/VectorUtils.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/Dominators.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
83#include "llvm/IR/PatternMatch.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
89#include "llvm/InitializePasses.h"
90#include "llvm/Support/Casting.h"
91#include "llvm/Support/CommandLine.h"
92#include "llvm/Support/Compiler.h"
93#include "llvm/Support/Debug.h"
94#include "llvm/Support/DebugCounter.h"
95#include "llvm/Support/ErrorHandling.h"
96#include "llvm/Support/KnownBits.h"
97#include "llvm/Support/KnownFPClass.h"
98#include "llvm/Support/raw_ostream.h"
99#include "llvm/Transforms/InstCombine/InstCombine.h"
100#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101#include "llvm/Transforms/Utils/Local.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
111#include "llvm/Transforms/Utils/InstructionWorklist.h"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(Val: true));
138
139static cl::opt<unsigned> MaxSinkNumUsers(
140 "instcombine-max-sink-users", cl::init(Val: 32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
143static cl::opt<unsigned>
144MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147static cl::opt<unsigned> MaxAllocSiteRemovableUsers(
148 "instcombine-max-allocsite-removable-users", cl::Hidden, cl::init(Val: 2048),
149 cl::desc("Maximum number of users to visit in alloc-site "
150 "removability analysis"));
151
152namespace llvm {
153extern cl::opt<bool> ProfcheckDisableMetadataFixes;
154} // end namespace llvm
155
156// FIXME: Remove this flag when it is no longer necessary to convert
157// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
158// increases variable availability at the cost of accuracy. Variables that
159// cannot be promoted by mem2reg or SROA will be described as living in memory
160// for their entire lifetime. However, passes like DSE and instcombine can
161// delete stores to the alloca, leading to misleading and inaccurate debug
162// information. This flag can be removed when those passes are fixed.
163static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
164 cl::Hidden, cl::init(Val: true));
165
166InstCombiner::IRBuilderInstCombineInserter::~IRBuilderInstCombineInserter() =
167 default;
168
169void InstCombiner::IRBuilderInstCombineInserter::InsertHelper(
170 Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const {
171 IRBuilderDefaultInserter::InsertHelper(I, Name, InsertPt);
172 IC.Worklist.add(I);
173 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
174 IC.AC.registerAssumption(CI: Assume);
175 if (IC.AnnotationMetadataSource)
176 I->copyMetadata(SrcInst: *IC.AnnotationMetadataSource, WL: LLVMContext::MD_annotation);
177}
178
179std::optional<Instruction *>
180InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
181 // Handle target specific intrinsics
182 if (II.getCalledFunction()->isTargetIntrinsic()) {
183 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
184 }
185 return std::nullopt;
186}
187
188std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
189 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
190 bool &KnownBitsComputed) {
191 // Handle target specific intrinsics
192 if (II.getCalledFunction()->isTargetIntrinsic()) {
193 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
194 IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
195 }
196 return std::nullopt;
197}
198
199std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
200 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
201 APInt &PoisonElts2, APInt &PoisonElts3,
202 std::function<void(Instruction *, unsigned, APInt, APInt &)>
203 SimplifyAndSetOp) {
204 // Handle target specific intrinsics
205 if (II.getCalledFunction()->isTargetIntrinsic()) {
206 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
207 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
208 SimplifyAndSetOp);
209 }
210 return std::nullopt;
211}
212
213bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
214 // Approved exception for TTI use: This queries a legality property of the
215 // target, not an profitability heuristic. Ideally this should be part of
216 // DataLayout instead.
217 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
218}
219
220Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
221 if (!RewriteGEP)
222 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
223
224 IRBuilderBase::InsertPointGuard Guard(Builder);
225 auto *Inst = dyn_cast<Instruction>(Val: GEP);
226 if (Inst)
227 Builder.SetInsertPoint(Inst);
228
229 Value *Offset = EmitGEPOffset(GEP);
230 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
231 if (Inst && !GEP->hasAllConstantIndices() &&
232 !GEP->getSourceElementType()->isIntegerTy(BitWidth: 8)) {
233 replaceInstUsesWith(
234 I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
235 IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
236 eraseInstFromFunction(I&: *Inst);
237 }
238 return Offset;
239}
240
241Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
242 GEPNoWrapFlags NW, Type *IdxTy,
243 bool RewriteGEPs) {
244 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
245 if (Sum)
246 return Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
247 HasNSW: NW.isInBounds());
248 else
249 return Offset;
250 };
251
252 Value *Sum = nullptr;
253 Value *OneUseSum = nullptr;
254 Value *OneUseBase = nullptr;
255 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
256 for (GEPOperator *GEP : reverse(C&: GEPs)) {
257 Value *Offset;
258 {
259 // Expand the offset at the point of the previous GEP to enable rewriting.
260 // However, use the original insertion point for calculating Sum.
261 IRBuilderBase::InsertPointGuard Guard(Builder);
262 auto *Inst = dyn_cast<Instruction>(Val: GEP);
263 if (RewriteGEPs && Inst)
264 Builder.SetInsertPoint(Inst);
265
266 Offset = llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
267 if (Offset->getType() != IdxTy)
268 Offset = Builder.CreateVectorSplat(
269 EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
270 if (GEP->hasOneUse()) {
271 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
272 OneUseSum = Add(OneUseSum, Offset);
273 OneUseFlags = OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
274 if (!OneUseBase)
275 OneUseBase = GEP->getPointerOperand();
276 continue;
277 }
278
279 if (OneUseSum)
280 Offset = Add(OneUseSum, Offset);
281
282 // Rewrite the GEP to reuse the computed offset. This also includes
283 // offsets from preceding one-use GEPs of matched type.
284 if (RewriteGEPs && Inst &&
285 Offset->getType()->isVectorTy() == GEP->getType()->isVectorTy() &&
286 !(GEP->getSourceElementType()->isIntegerTy(BitWidth: 8) &&
287 GEP->getOperand(i_nocapture: 1) == Offset)) {
288 replaceInstUsesWith(
289 I&: *Inst,
290 V: Builder.CreatePtrAdd(
291 Ptr: OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, Name: "",
292 NW: OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags())));
293 eraseInstFromFunction(I&: *Inst);
294 }
295 }
296
297 Sum = Add(Sum, Offset);
298 OneUseSum = OneUseBase = nullptr;
299 OneUseFlags = GEPNoWrapFlags::all();
300 }
301 if (OneUseSum)
302 Sum = Add(Sum, OneUseSum);
303 if (!Sum)
304 return Constant::getNullValue(Ty: IdxTy);
305 return Sum;
306}
307
308/// Legal integers and common types are considered desirable. This is used to
309/// avoid creating instructions with types that may not be supported well by the
310/// the backend.
311/// NOTE: This treats i8, i16 and i32 specially because they are common
312/// types in frontend languages.
313bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
314 switch (BitWidth) {
315 case 8:
316 case 16:
317 case 32:
318 return true;
319 default:
320 return DL.isLegalInteger(Width: BitWidth);
321 }
322}
323
324/// Return true if it is desirable to convert an integer computation from a
325/// given bit width to a new bit width.
326/// We don't want to convert from a legal or desirable type (like i8) to an
327/// illegal type or from a smaller to a larger illegal type. A width of '1'
328/// is always treated as a desirable type because i1 is a fundamental type in
329/// IR, and there are many specialized optimizations for i1 types.
330/// Common/desirable widths are equally treated as legal to convert to, in
331/// order to open up more combining opportunities.
332bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
333 unsigned ToWidth) const {
334 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
335 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
336
337 // Convert to desirable widths even if they are not legal types.
338 // Only shrink types, to prevent infinite loops.
339 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
340 return true;
341
342 // If this is a legal or desiable integer from type, and the result would be
343 // an illegal type, don't do the transformation.
344 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
345 return false;
346
347 // Otherwise, if both are illegal, do not increase the size of the result. We
348 // do allow things like i160 -> i64, but not i64 -> i160.
349 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
350 return false;
351
352 return true;
353}
354
355/// Return true if it is desirable to convert a computation from 'From' to 'To'.
356/// We don't want to convert from a legal to an illegal type or from a smaller
357/// to a larger illegal type. i1 is always treated as a legal type because it is
358/// a fundamental type in IR, and there are many specialized optimizations for
359/// i1 types.
360bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
361 // TODO: This could be extended to allow vectors. Datalayout changes might be
362 // needed to properly support that.
363 if (!From->isIntegerTy() || !To->isIntegerTy())
364 return false;
365
366 unsigned FromWidth = From->getPrimitiveSizeInBits();
367 unsigned ToWidth = To->getPrimitiveSizeInBits();
368 return shouldChangeType(FromWidth, ToWidth);
369}
370
371// Return true, if No Signed Wrap should be maintained for I.
372// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
373// where both B and C should be ConstantInts, results in a constant that does
374// not overflow. This function only handles the Add/Sub/Mul opcodes. For
375// all other opcodes, the function conservatively returns false.
376static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
377 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
378 if (!OBO || !OBO->hasNoSignedWrap())
379 return false;
380
381 const APInt *BVal, *CVal;
382 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
383 return false;
384
385 // We reason about Add/Sub/Mul Only.
386 bool Overflow = false;
387 switch (I.getOpcode()) {
388 case Instruction::Add:
389 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
390 break;
391 case Instruction::Sub:
392 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
393 break;
394 case Instruction::Mul:
395 (void)BVal->smul_ov(RHS: *CVal, Overflow);
396 break;
397 default:
398 // Conservatively return false for other opcodes.
399 return false;
400 }
401 return !Overflow;
402}
403
404static bool hasNoUnsignedWrap(BinaryOperator &I) {
405 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
406 return OBO && OBO->hasNoUnsignedWrap();
407}
408
409static bool hasNoSignedWrap(BinaryOperator &I) {
410 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
411 return OBO && OBO->hasNoSignedWrap();
412}
413
414/// Combine constant operands of associative operations either before or after a
415/// cast to eliminate one of the associative operations:
416/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
417/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
418static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
419 InstCombinerImpl &IC) {
420 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
421 if (!Cast || !Cast->hasOneUse())
422 return false;
423
424 // TODO: Enhance logic for other casts and remove this check.
425 auto CastOpcode = Cast->getOpcode();
426 if (CastOpcode != Instruction::ZExt)
427 return false;
428
429 // TODO: Enhance logic for other BinOps and remove this check.
430 if (!BinOp1->isBitwiseLogicOp())
431 return false;
432
433 auto AssocOpcode = BinOp1->getOpcode();
434 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
435 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
436 return false;
437
438 Constant *C1, *C2;
439 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
440 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
441 return false;
442
443 // TODO: This assumes a zext cast.
444 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
445 // to the destination type might lose bits.
446
447 // Fold the constants together in the destination type:
448 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
449 const DataLayout &DL = IC.getDataLayout();
450 Type *DestTy = C1->getType();
451 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
452 if (!CastC2)
453 return false;
454 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
455 if (!FoldedC)
456 return false;
457
458 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
459 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
460 BinOp1->dropPoisonGeneratingFlags();
461 Cast->dropPoisonGeneratingFlags();
462 return true;
463}
464
465// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
466// inttoptr ( ptrtoint (x) ) --> x
467Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
468 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
469 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
470 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
471 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
472 Type *CastTy = IntToPtr->getDestTy();
473 if (PtrToInt &&
474 CastTy->getPointerAddressSpace() ==
475 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
476 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
477 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
478 return PtrToInt->getOperand(i_nocapture: 0);
479 }
480 return nullptr;
481}
482
483/// This performs a few simplifications for operators that are associative or
484/// commutative:
485///
486/// Commutative operators:
487///
488/// 1. Order operands such that they are listed from right (least complex) to
489/// left (most complex). This puts constants before unary operators before
490/// binary operators.
491///
492/// Associative operators:
493///
494/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
495/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
496///
497/// Associative and commutative operators:
498///
499/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
500/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
501/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
502/// if C1 and C2 are constants.
503bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
504 Instruction::BinaryOps Opcode = I.getOpcode();
505 bool Changed = false;
506
507 do {
508 // Order operands such that they are listed from right (least complex) to
509 // left (most complex). This puts constants before unary operators before
510 // binary operators.
511 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
512 getComplexity(V: I.getOperand(i_nocapture: 1)))
513 Changed = !I.swapOperands();
514
515 if (I.isCommutative()) {
516 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
517 replaceOperand(I, OpNum: 0, V: Pair->first);
518 replaceOperand(I, OpNum: 1, V: Pair->second);
519 Changed = true;
520 }
521 }
522
523 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
524 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
525
526 if (I.isAssociative()) {
527 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
528 if (Op0 && Op0->getOpcode() == Opcode) {
529 Value *A = Op0->getOperand(i_nocapture: 0);
530 Value *B = Op0->getOperand(i_nocapture: 1);
531 Value *C = I.getOperand(i_nocapture: 1);
532
533 // Does "B op C" simplify?
534 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
535 // It simplifies to V. Form "A op V".
536 replaceOperand(I, OpNum: 0, V: A);
537 replaceOperand(I, OpNum: 1, V);
538 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
539 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
540
541 // Conservatively clear all optional flags since they may not be
542 // preserved by the reassociation. Reset nsw/nuw based on the above
543 // analysis.
544 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(Val: &I))
545 PDI->setIsDisjoint(false);
546
547 // Note: this is only valid because SimplifyBinOp doesn't look at
548 // the operands to Op0.
549 if (isa<OverflowingBinaryOperator>(Val: I)) {
550 I.setHasNoUnsignedWrap(IsNUW);
551 I.setHasNoSignedWrap(IsNSW);
552 }
553
554 Changed = true;
555 ++NumReassoc;
556 continue;
557 }
558 }
559
560 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
561 if (Op1 && Op1->getOpcode() == Opcode) {
562 Value *A = I.getOperand(i_nocapture: 0);
563 Value *B = Op1->getOperand(i_nocapture: 0);
564 Value *C = Op1->getOperand(i_nocapture: 1);
565
566 // Does "A op B" simplify?
567 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
568 // It simplifies to V. Form "V op C".
569 replaceOperand(I, OpNum: 0, V);
570 replaceOperand(I, OpNum: 1, V: C);
571 // Conservatively clear the optional flags, since they may not be
572 // preserved by the reassociation.
573 if (!isa<FPMathOperator>(Val: I))
574 I.dropPoisonGeneratingFlags();
575 Changed = true;
576 ++NumReassoc;
577 continue;
578 }
579 }
580 }
581
582 if (I.isAssociative() && I.isCommutative()) {
583 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
584 Changed = true;
585 ++NumReassoc;
586 continue;
587 }
588
589 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
590 if (Op0 && Op0->getOpcode() == Opcode) {
591 Value *A = Op0->getOperand(i_nocapture: 0);
592 Value *B = Op0->getOperand(i_nocapture: 1);
593 Value *C = I.getOperand(i_nocapture: 1);
594
595 // Does "C op A" simplify?
596 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
597 // It simplifies to V. Form "V op B".
598 replaceOperand(I, OpNum: 0, V);
599 replaceOperand(I, OpNum: 1, V: B);
600 // Conservatively clear the optional flags, since they may not be
601 // preserved by the reassociation.
602 if (!isa<FPMathOperator>(Val: I))
603 I.dropPoisonGeneratingFlags();
604 Changed = true;
605 ++NumReassoc;
606 continue;
607 }
608 }
609
610 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
611 if (Op1 && Op1->getOpcode() == Opcode) {
612 Value *A = I.getOperand(i_nocapture: 0);
613 Value *B = Op1->getOperand(i_nocapture: 0);
614 Value *C = Op1->getOperand(i_nocapture: 1);
615
616 // Does "C op A" simplify?
617 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
618 // It simplifies to V. Form "B op V".
619 replaceOperand(I, OpNum: 0, V: B);
620 replaceOperand(I, OpNum: 1, V);
621 // Conservatively clear the optional flags, since they may not be
622 // preserved by the reassociation.
623 if (!isa<FPMathOperator>(Val: I))
624 I.dropPoisonGeneratingFlags();
625 Changed = true;
626 ++NumReassoc;
627 continue;
628 }
629 }
630
631 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
632 // if C1 and C2 are constants.
633 Value *A, *B;
634 Constant *C1, *C2, *CRes;
635 if (Op0 && Op1 &&
636 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
637 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
638 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
639 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
640 bool IsNUW = hasNoUnsignedWrap(I) &&
641 hasNoUnsignedWrap(I&: *Op0) &&
642 hasNoUnsignedWrap(I&: *Op1);
643 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
644 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
645 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
646
647 if (isa<FPMathOperator>(Val: NewBO)) {
648 FastMathFlags Flags = I.getFastMathFlags() &
649 Op0->getFastMathFlags() &
650 Op1->getFastMathFlags();
651 NewBO->setFastMathFlags(Flags);
652 }
653 InsertNewInstWith(New: NewBO, Old: I.getIterator());
654 NewBO->takeName(V: Op1);
655 replaceOperand(I, OpNum: 0, V: NewBO);
656 replaceOperand(I, OpNum: 1, V: CRes);
657 // Conservatively clear the optional flags, since they may not be
658 // preserved by the reassociation.
659 if (!isa<FPMathOperator>(Val: I))
660 I.dropPoisonGeneratingFlags();
661 if (IsNUW)
662 I.setHasNoUnsignedWrap(true);
663
664 Changed = true;
665 continue;
666 }
667 }
668
669 // No further simplifications.
670 return Changed;
671 } while (true);
672}
673
674/// Return whether "X LOp (Y ROp Z)" is always equal to
675/// "(X LOp Y) ROp (X LOp Z)".
676static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
677 Instruction::BinaryOps ROp) {
678 // X & (Y | Z) <--> (X & Y) | (X & Z)
679 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
680 if (LOp == Instruction::And)
681 return ROp == Instruction::Or || ROp == Instruction::Xor;
682
683 // X | (Y & Z) <--> (X | Y) & (X | Z)
684 if (LOp == Instruction::Or)
685 return ROp == Instruction::And;
686
687 // X * (Y + Z) <--> (X * Y) + (X * Z)
688 // X * (Y - Z) <--> (X * Y) - (X * Z)
689 if (LOp == Instruction::Mul)
690 return ROp == Instruction::Add || ROp == Instruction::Sub;
691
692 return false;
693}
694
695/// Return whether "(X LOp Y) ROp Z" is always equal to
696/// "(X ROp Z) LOp (Y ROp Z)".
697static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
698 Instruction::BinaryOps ROp) {
699 if (Instruction::isCommutative(Opcode: ROp))
700 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
701
702 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
703 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
704
705 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
706 // but this requires knowing that the addition does not overflow and other
707 // such subtleties.
708}
709
710/// This function returns identity value for given opcode, which can be used to
711/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
712static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
713 if (isa<Constant>(Val: V))
714 return nullptr;
715
716 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
717}
718
719/// This function predicates factorization using distributive laws. By default,
720/// it just returns the 'Op' inputs. But for special-cases like
721/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
722/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
723/// allow more factorization opportunities.
724static Instruction::BinaryOps
725getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
726 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
727 assert(Op && "Expected a binary operator");
728 LHS = Op->getOperand(i_nocapture: 0);
729 RHS = Op->getOperand(i_nocapture: 1);
730 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
731 Constant *C;
732 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
733 // X << C --> X * (1 << C)
734 RHS = ConstantFoldBinaryInstruction(
735 Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C);
736 assert(RHS && "Constant folding of immediate constants failed");
737 return Instruction::Mul;
738 }
739 // TODO: We can add other conversions e.g. shr => div etc.
740 }
741 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
742 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
743 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
744 // lshr nneg C, X --> ashr nneg C, X
745 return Instruction::AShr;
746 }
747 }
748 return Op->getOpcode();
749}
750
751/// This tries to simplify binary operations by factorizing out common terms
752/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
753static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
754 InstCombiner::BuilderTy &Builder,
755 Instruction::BinaryOps InnerOpcode, Value *A,
756 Value *B, Value *C, Value *D) {
757 assert(A && B && C && D && "All values must be provided");
758
759 Value *V = nullptr;
760 Value *RetVal = nullptr;
761 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
762 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
763
764 // Does "X op' Y" always equal "Y op' X"?
765 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
766
767 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
768 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
769 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
770 // commutative case, "(A op' B) op (C op' A)"?
771 if (A == C || (InnerCommutative && A == D)) {
772 if (A != C)
773 std::swap(a&: C, b&: D);
774 // Consider forming "A op' (B op D)".
775 // If "B op D" simplifies then it can be formed with no cost.
776 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
777
778 // If "B op D" doesn't simplify then only go on if one of the existing
779 // operations "A op' B" and "C op' D" will be zapped as no longer used.
780 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
781 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
782 if (V)
783 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
784 }
785 }
786
787 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
788 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
789 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
790 // commutative case, "(A op' B) op (B op' D)"?
791 if (B == D || (InnerCommutative && B == C)) {
792 if (B != D)
793 std::swap(a&: C, b&: D);
794 // Consider forming "(A op C) op' B".
795 // If "A op C" simplifies then it can be formed with no cost.
796 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
797
798 // If "A op C" doesn't simplify then only go on if one of the existing
799 // operations "A op' B" and "C op' D" will be zapped as no longer used.
800 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
801 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
802 if (V)
803 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
804 }
805 }
806
807 if (!RetVal)
808 return nullptr;
809
810 ++NumFactor;
811 RetVal->takeName(V: &I);
812
813 // Try to add no-overflow flags to the final value.
814 if (isa<BinaryOperator>(Val: RetVal)) {
815 bool HasNSW = false;
816 bool HasNUW = false;
817 if (isa<OverflowingBinaryOperator>(Val: &I)) {
818 HasNSW = I.hasNoSignedWrap();
819 HasNUW = I.hasNoUnsignedWrap();
820 }
821 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
822 HasNSW &= LOBO->hasNoSignedWrap();
823 HasNUW &= LOBO->hasNoUnsignedWrap();
824 }
825
826 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
827 HasNSW &= ROBO->hasNoSignedWrap();
828 HasNUW &= ROBO->hasNoUnsignedWrap();
829 }
830
831 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
832 // We can propagate 'nsw' if we know that
833 // %Y = mul nsw i16 %X, C
834 // %Z = add nsw i16 %Y, %X
835 // =>
836 // %Z = mul nsw i16 %X, C+1
837 //
838 // iff C+1 isn't INT_MIN
839 const APInt *CInt;
840 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
841 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
842
843 // nuw can be propagated with any constant or nuw value.
844 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
845 }
846 }
847 return RetVal;
848}
849
850// If `I` has one Const operand and the other matches `(ctpop (not x))`,
851// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
852// This is only useful is the new subtract can fold so we only handle the
853// following cases:
854// 1) (add/sub/disjoint_or C, (ctpop (not x))
855// -> (add/sub/disjoint_or C', (ctpop x))
856// 1) (cmp pred C, (ctpop (not x))
857// -> (cmp pred C', (ctpop x))
858Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
859 unsigned Opc = I->getOpcode();
860 unsigned ConstIdx = 1;
861 switch (Opc) {
862 default:
863 return nullptr;
864 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
865 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
866 // is constant.
867 case Instruction::Sub:
868 ConstIdx = 0;
869 break;
870 case Instruction::ICmp:
871 // Signed predicates aren't correct in some edge cases like for i2 types, as
872 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
873 // comparisons against it are simplfied to unsigned.
874 if (cast<ICmpInst>(Val: I)->isSigned())
875 return nullptr;
876 break;
877 case Instruction::Or:
878 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
879 return nullptr;
880 [[fallthrough]];
881 case Instruction::Add:
882 break;
883 }
884
885 Value *Op;
886 // Find ctpop.
887 if (!match(V: I->getOperand(i: 1 - ConstIdx), P: m_OneUse(SubPattern: m_Ctpop(Op0: m_Value(V&: Op)))))
888 return nullptr;
889
890 Constant *C;
891 // Check other operand is ImmConstant.
892 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
893 return nullptr;
894
895 Type *Ty = Op->getType();
896 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
897 // Need extra check for icmp. Note if this check is true, it generally means
898 // the icmp will simplify to true/false.
899 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
900 Constant *Cmp =
901 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
902 if (!Cmp || !Cmp->isNullValue())
903 return nullptr;
904 }
905
906 // Check we can invert `(not x)` for free.
907 bool Consumes = false;
908 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
909 return nullptr;
910 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
911 assert(NotOp != nullptr &&
912 "Desync between isFreeToInvert and getFreelyInverted");
913
914 Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
915
916 Value *R = nullptr;
917
918 // Do the transformation here to avoid potentially introducing an infinite
919 // loop.
920 switch (Opc) {
921 case Instruction::Sub:
922 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
923 break;
924 case Instruction::Or:
925 case Instruction::Add:
926 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
927 break;
928 case Instruction::ICmp:
929 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
930 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
931 break;
932 default:
933 llvm_unreachable("Unhandled Opcode");
934 }
935 assert(R != nullptr);
936 return replaceInstUsesWith(I&: *I, V: R);
937}
938
939// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
940// IFF
941// 1) the logic_shifts match
942// 2) either both binops are binops and one is `and` or
943// BinOp1 is `and`
944// (logic_shift (inv_logic_shift C1, C), C) == C1 or
945//
946// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
947//
948// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
949// IFF
950// 1) the logic_shifts match
951// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
952//
953// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
954//
955// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
956// IFF
957// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
958// 2) Binop2 is `not`
959//
960// -> (arithmetic_shift Binop1((not X), Y), Amt)
961
962Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
963 const DataLayout &DL = I.getDataLayout();
964 auto IsValidBinOpc = [](unsigned Opc) {
965 switch (Opc) {
966 default:
967 return false;
968 case Instruction::And:
969 case Instruction::Or:
970 case Instruction::Xor:
971 case Instruction::Add:
972 // Skip Sub as we only match constant masks which will canonicalize to use
973 // add.
974 return true;
975 }
976 };
977
978 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
979 // constraints.
980 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
981 unsigned ShOpc) {
982 assert(ShOpc != Instruction::AShr);
983 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
984 ShOpc == Instruction::Shl;
985 };
986
987 auto GetInvShift = [](unsigned ShOpc) {
988 assert(ShOpc != Instruction::AShr);
989 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
990 };
991
992 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
993 unsigned ShOpc, Constant *CMask,
994 Constant *CShift) {
995 // If the BinOp1 is `and` we don't need to check the mask.
996 if (BinOpc1 == Instruction::And)
997 return true;
998
999 // For all other possible transfers we need complete distributable
1000 // binop/shift (anything but `add` + `lshr`).
1001 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
1002 return false;
1003
1004 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
1005 // vecs, otherwise the mask will be simplified and the following check will
1006 // handle it).
1007 if (BinOpc2 == Instruction::And)
1008 return true;
1009
1010 // Otherwise, need mask that meets the below requirement.
1011 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1012 Constant *MaskInvShift =
1013 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1014 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
1015 CMask;
1016 };
1017
1018 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1019 Constant *CMask, *CShift;
1020 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1021 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
1022 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
1023 return nullptr;
1024 if (!match(
1025 V: I.getOperand(i_nocapture: 1 - ShOpnum),
1026 P: m_OneUse(SubPattern: m_c_BinOp(
1027 L: m_CombineAnd(Ps: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
1028 Ps: m_Value(V&: ShiftedX)),
1029 R: m_Value(V&: Mask)))))
1030 return nullptr;
1031 // Make sure we are matching instruction shifts and not ConstantExpr
1032 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
1033 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
1034 if (!IY || !IX)
1035 return nullptr;
1036
1037 // LHS and RHS need same shift opcode
1038 unsigned ShOpc = IY->getOpcode();
1039 if (ShOpc != IX->getOpcode())
1040 return nullptr;
1041
1042 // Make sure binop is real instruction and not ConstantExpr
1043 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
1044 if (!BO2)
1045 return nullptr;
1046
1047 unsigned BinOpc = BO2->getOpcode();
1048 // Make sure we have valid binops.
1049 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1050 return nullptr;
1051
1052 if (ShOpc == Instruction::AShr) {
1053 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
1054 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
1055 Value *NotX = Builder.CreateNot(V: X);
1056 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
1057 return BinaryOperator::Create(
1058 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1059 }
1060
1061 return nullptr;
1062 }
1063
1064 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1065 // distribute to drop the shift irrelevant of constants.
1066 if (BinOpc == I.getOpcode() &&
1067 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1068 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1069 Value *NewBinOp1 = Builder.CreateBinOp(
1070 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1071 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1072 }
1073
1074 // Otherwise we can only distribute by constant shifting the mask, so
1075 // ensure we have constants.
1076 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1077 return nullptr;
1078 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1079 return nullptr;
1080
1081 // Check if we can distribute the binops.
1082 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1083 return nullptr;
1084
1085 Constant *NewCMask =
1086 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1087 Value *NewBinOp2 = Builder.CreateBinOp(
1088 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1089 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1090 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1091 S1: NewBinOp1, S2: CShift);
1092 };
1093
1094 if (Instruction *R = MatchBinOp(0))
1095 return R;
1096 return MatchBinOp(1);
1097}
1098
1099// (Binop (zext C), (select C, T, F))
1100// -> (select C, (binop 1, T), (binop 0, F))
1101//
1102// (Binop (sext C), (select C, T, F))
1103// -> (select C, (binop -1, T), (binop 0, F))
1104//
1105// Attempt to simplify binary operations into a select with folded args, when
1106// one operand of the binop is a select instruction and the other operand is a
1107// zext/sext extension, whose value is the select condition.
1108Instruction *
1109InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1110 // TODO: this simplification may be extended to any speculatable instruction,
1111 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1112 Instruction::BinaryOps Opc = I.getOpcode();
1113 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1114 Value *A, *CondVal, *TrueVal, *FalseVal;
1115 Value *CastOp;
1116 Constant *CastTrueVal, *CastFalseVal;
1117
1118 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1119 return match(V: CastOp, P: m_SelectLike(C: m_Value(V&: A), TrueC: m_Constant(C&: CastTrueVal),
1120 FalseC: m_Constant(C&: CastFalseVal))) &&
1121 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1122 R: m_Value(V&: FalseVal)));
1123 };
1124
1125 // Make sure one side of the binop is a select instruction, and the other is a
1126 // zero/sign extension operating on a i1.
1127 if (MatchSelectAndCast(LHS, RHS))
1128 CastOp = LHS;
1129 else if (MatchSelectAndCast(RHS, LHS))
1130 CastOp = RHS;
1131 else
1132 return nullptr;
1133
1134 SelectInst *SI = ProfcheckDisableMetadataFixes
1135 ? nullptr
1136 : cast<SelectInst>(Val: CastOp == LHS ? RHS : LHS);
1137
1138 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1139 bool IsCastOpRHS = (CastOp == RHS);
1140 Value *CastVal = IsTrueArm ? CastFalseVal : CastTrueVal;
1141
1142 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: CastVal)
1143 : Builder.CreateBinOp(Opc, LHS: CastVal, RHS: V);
1144 };
1145
1146 // If the value used in the zext/sext is the select condition, or the negated
1147 // of the select condition, the binop can be simplified.
1148 if (CondVal == A) {
1149 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1150 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1151 S2: NewFoldedConst(true, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1152 }
1153 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1154 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1155 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1156 S2: NewFoldedConst(false, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1157 }
1158
1159 return nullptr;
1160}
1161
1162Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1163 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1164 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1165 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1166 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1167 Value *A, *B, *C, *D;
1168 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1169
1170 if (Op0)
1171 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1172 if (Op1)
1173 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1174
1175 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1176 // a common term.
1177 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1178 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1179 return V;
1180
1181 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1182 // term.
1183 if (Op0)
1184 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1185 if (Value *V =
1186 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1187 return V;
1188
1189 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1190 // term.
1191 if (Op1)
1192 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1193 if (Value *V =
1194 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1195 return V;
1196
1197 return nullptr;
1198}
1199
1200/// This tries to simplify binary operations which some other binary operation
1201/// distributes over either by factorizing out common terms
1202/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1203/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1204/// Returns the simplified value, or null if it didn't simplify.
1205Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1206 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1207 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1208 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1209 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1210
1211 // Factorization.
1212 if (Value *R = tryFactorizationFolds(I))
1213 return R;
1214
1215 // Expansion.
1216 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1217 // The instruction has the form "(A op' B) op C". See if expanding it out
1218 // to "(A op C) op' (B op C)" results in simplifications.
1219 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1220 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1221
1222 // Disable the use of undef because it's not safe to distribute undef.
1223 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1224 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1225 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1226
1227 // Do "A op C" and "B op C" both simplify?
1228 if (L && R) {
1229 // They do! Return "L op' R".
1230 ++NumExpand;
1231 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1232 C->takeName(V: &I);
1233 return C;
1234 }
1235
1236 // Does "A op C" simplify to the identity value for the inner opcode?
1237 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1238 // They do! Return "B op C".
1239 ++NumExpand;
1240 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1241 C->takeName(V: &I);
1242 return C;
1243 }
1244
1245 // Does "B op C" simplify to the identity value for the inner opcode?
1246 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1247 // They do! Return "A op C".
1248 ++NumExpand;
1249 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1250 C->takeName(V: &I);
1251 return C;
1252 }
1253 }
1254
1255 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1256 // The instruction has the form "A op (B op' C)". See if expanding it out
1257 // to "(A op B) op' (A op C)" results in simplifications.
1258 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1259 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1260
1261 // Disable the use of undef because it's not safe to distribute undef.
1262 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1263 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1264 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1265
1266 // Do "A op B" and "A op C" both simplify?
1267 if (L && R) {
1268 // They do! Return "L op' R".
1269 ++NumExpand;
1270 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1271 A->takeName(V: &I);
1272 return A;
1273 }
1274
1275 // Does "A op B" simplify to the identity value for the inner opcode?
1276 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1277 // They do! Return "A op C".
1278 ++NumExpand;
1279 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1280 A->takeName(V: &I);
1281 return A;
1282 }
1283
1284 // Does "A op C" simplify to the identity value for the inner opcode?
1285 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1286 // They do! Return "A op B".
1287 ++NumExpand;
1288 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1289 A->takeName(V: &I);
1290 return A;
1291 }
1292 }
1293
1294 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1295}
1296
1297static std::optional<std::pair<Value *, Value *>>
1298matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1299 if (LHS->getParent() != RHS->getParent())
1300 return std::nullopt;
1301
1302 if (LHS->getNumIncomingValues() < 2)
1303 return std::nullopt;
1304
1305 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1306 return std::nullopt;
1307
1308 Value *L0 = LHS->getIncomingValue(i: 0);
1309 Value *R0 = RHS->getIncomingValue(i: 0);
1310
1311 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1312 Value *L1 = LHS->getIncomingValue(i: I);
1313 Value *R1 = RHS->getIncomingValue(i: I);
1314
1315 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1316 continue;
1317
1318 return std::nullopt;
1319 }
1320
1321 return std::optional(std::pair(L0, R0));
1322}
1323
1324std::optional<std::pair<Value *, Value *>>
1325InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1326 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1327 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1328 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1329 return std::nullopt;
1330 switch (LHSInst->getOpcode()) {
1331 case Instruction::PHI:
1332 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1333 case Instruction::Select: {
1334 Value *Cond = LHSInst->getOperand(i: 0);
1335 Value *TrueVal = LHSInst->getOperand(i: 1);
1336 Value *FalseVal = LHSInst->getOperand(i: 2);
1337 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1338 FalseVal == RHSInst->getOperand(i: 1))
1339 return std::pair(TrueVal, FalseVal);
1340 return std::nullopt;
1341 }
1342 case Instruction::Call: {
1343 // Match min(a, b) and max(a, b)
1344 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1345 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1346 if (LHSMinMax && RHSMinMax &&
1347 LHSMinMax->getPredicate() ==
1348 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1349 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1350 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1351 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1353 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1354 return std::nullopt;
1355 }
1356 default:
1357 return std::nullopt;
1358 }
1359}
1360
1361Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1362 Value *LHS,
1363 Value *RHS) {
1364 Value *A, *B, *C, *D, *E, *F;
1365 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1366 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1367 if (!LHSIsSelect && !RHSIsSelect)
1368 return nullptr;
1369
1370 SelectInst *SI = ProfcheckDisableMetadataFixes
1371 ? nullptr
1372 : cast<SelectInst>(Val: LHSIsSelect ? LHS : RHS);
1373
1374 FastMathFlags FMF;
1375 BuilderTy::FastMathFlagGuard Guard(Builder);
1376 if (const auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) {
1377 FMF = FPOp->getFastMathFlags();
1378 Builder.setFastMathFlags(FMF);
1379 }
1380
1381 Instruction::BinaryOps Opcode = I.getOpcode();
1382 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1383
1384 Value *Cond, *True = nullptr, *False = nullptr;
1385
1386 // Special-case for add/negate combination. Replace the zero in the negation
1387 // with the trailing add operand:
1388 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1389 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1390 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1391 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1392 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1393 return nullptr;
1394 Value *N;
1395 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1396 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1397 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName(), MDFrom: SI);
1398 }
1399 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1400 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1401 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName(), MDFrom: SI);
1402 }
1403 return nullptr;
1404 };
1405
1406 if (LHSIsSelect && RHSIsSelect && A == D) {
1407 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1408 Cond = A;
1409 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1410 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1411
1412 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1413 if (False && !True)
1414 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1415 else if (True && !False)
1416 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1417 }
1418 } else if (LHSIsSelect && LHS->hasOneUse()) {
1419 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1420 Cond = A;
1421 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1422 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1423 if (Value *NewSel = foldAddNegate(B, C, RHS))
1424 return NewSel;
1425 } else if (RHSIsSelect && RHS->hasOneUse()) {
1426 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1427 Cond = D;
1428 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1429 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1430 if (Value *NewSel = foldAddNegate(E, F, LHS))
1431 return NewSel;
1432 }
1433
1434 if (!True || !False)
1435 return nullptr;
1436
1437 Value *NewSI = Builder.CreateSelect(C: Cond, True, False, Name: I.getName(), MDFrom: SI);
1438 NewSI->takeName(V: &I);
1439 return NewSI;
1440}
1441
1442/// Freely adapt every user of V as-if V was changed to !V.
1443/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1444void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1445 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1446 for (User *U : make_early_inc_range(Range: I->users())) {
1447 if (U == IgnoredUser)
1448 continue; // Don't consider this user.
1449 switch (cast<Instruction>(Val: U)->getOpcode()) {
1450 case Instruction::Select: {
1451 auto *SI = cast<SelectInst>(Val: U);
1452 SI->swapValues();
1453 SI->swapProfMetadata();
1454 break;
1455 }
1456 case Instruction::CondBr: {
1457 CondBrInst *BI = cast<CondBrInst>(Val: U);
1458 BI->swapSuccessors(); // swaps prof metadata too
1459 if (BPI)
1460 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1461 break;
1462 }
1463 case Instruction::Xor:
1464 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1465 // Add to worklist for DCE.
1466 addToWorklist(I: cast<Instruction>(Val: U));
1467 break;
1468 default:
1469 llvm_unreachable("Got unexpected user - out of sync with "
1470 "canFreelyInvertAllUsersOf() ?");
1471 }
1472 }
1473
1474 // Update pre-existing debug value uses.
1475 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1476 llvm::findDbgValues(V: I, DbgVariableRecords);
1477
1478 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1479 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1480 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1481 Idx != End; ++Idx)
1482 if (DbgVal->getVariableLocationOp(OpIdx: Idx) == I)
1483 DbgVal->setExpression(
1484 DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1485 }
1486}
1487
1488/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1489/// constant zero (which is the 'negate' form).
1490Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1491 Value *NegV;
1492 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1493 return NegV;
1494
1495 // Constants can be considered to be negated values if they can be folded.
1496 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1497 return ConstantExpr::getNeg(C);
1498
1499 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1500 if (C->getType()->getElementType()->isIntegerTy())
1501 return ConstantExpr::getNeg(C);
1502
1503 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1504 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1505 Constant *Elt = CV->getAggregateElement(Elt: i);
1506 if (!Elt)
1507 return nullptr;
1508
1509 if (isa<UndefValue>(Val: Elt))
1510 continue;
1511
1512 if (!isa<ConstantInt>(Val: Elt))
1513 return nullptr;
1514 }
1515 return ConstantExpr::getNeg(C: CV);
1516 }
1517
1518 // Negate integer vector splats.
1519 if (auto *CV = dyn_cast<Constant>(Val: V))
1520 if (CV->getType()->isVectorTy() &&
1521 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1522 return ConstantExpr::getNeg(C: CV);
1523
1524 return nullptr;
1525}
1526
1527// Try to fold:
1528// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1529// -> ({s|u}itofp (int_binop x, y))
1530// 2) (fp_binop ({s|u}itofp x), FpC)
1531// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1532//
1533// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1534Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1535 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1536 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1537
1538 Type *FPTy = BO.getType();
1539 Type *IntTy = IntOps[0]->getType();
1540
1541 unsigned IntSz = IntTy->getScalarSizeInBits();
1542 // This is the maximum number of inuse bits by the integer where the int -> fp
1543 // casts are exact.
1544 unsigned MaxRepresentableBits =
1545 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1546
1547 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1548 // checks later on.
1549 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1550
1551 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1552 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1553 auto IsNonZero = [&](unsigned OpNo) -> bool {
1554 if (OpsKnown[OpNo].hasKnownBits() &&
1555 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1556 return true;
1557 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1558 };
1559
1560 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1561 // NB: This matches the impl in ValueTracking, we just try to use cached
1562 // knownbits here. If we ever start supporting WithCache for
1563 // `isKnownNonNegative`, change this to an explicit call.
1564 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1565 };
1566
1567 // Check if we know for certain that ({s|u}itofp op) is exact.
1568 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1569 // Can we treat this operand as the desired sign?
1570 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1571 !IsNonNeg(OpNo))
1572 return false;
1573
1574 // If fp precision >= bitwidth(op) then its exact.
1575 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1576 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1577 // handled specially. We can't, however, increase the bound arbitrarily for
1578 // `sitofp` as for larger sizes, it won't sign extend.
1579 if (MaxRepresentableBits < IntSz) {
1580 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1581 // numSignBits(op).
1582 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1583 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1584 if (OpsFromSigned)
1585 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1586 // Finally for unsigned check that fp precision >= bitwidth(op) -
1587 // numLeadingZeros(op).
1588 else {
1589 NumUsedLeadingBits[OpNo] =
1590 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1591 }
1592 }
1593 // NB: We could also check if op is known to be a power of 2 or zero (which
1594 // will always be representable). Its unlikely, however, that is we are
1595 // unable to bound op in any way we will be able to pass the overflow checks
1596 // later on.
1597
1598 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1599 return false;
1600 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1601 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1602 IsNonZero(OpNo);
1603 };
1604
1605 // If we have a constant rhs, see if we can losslessly convert it to an int.
1606 if (Op1FpC != nullptr) {
1607 // Signed + Mul req non-zero
1608 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1609 !match(V: Op1FpC, P: m_NonZeroFP()))
1610 return nullptr;
1611
1612 Constant *Op1IntC = ConstantFoldCastOperand(
1613 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1614 DestTy: IntTy, DL);
1615 if (Op1IntC == nullptr)
1616 return nullptr;
1617 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1618 : Instruction::UIToFP,
1619 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1620 return nullptr;
1621
1622 // First try to keep sign of cast the same.
1623 IntOps[1] = Op1IntC;
1624 }
1625
1626 // Ensure lhs/rhs integer types match.
1627 if (IntTy != IntOps[1]->getType())
1628 return nullptr;
1629
1630 if (Op1FpC == nullptr) {
1631 if (!IsValidPromotion(1))
1632 return nullptr;
1633 }
1634 if (!IsValidPromotion(0))
1635 return nullptr;
1636
1637 // Final we check if the integer version of the binop will not overflow.
1638 BinaryOperator::BinaryOps IntOpc;
1639 // Because of the precision check, we can often rule out overflows.
1640 bool NeedsOverflowCheck = true;
1641 // Try to conservatively rule out overflow based on the already done precision
1642 // checks.
1643 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1644 unsigned OverflowMaxCurBits =
1645 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1646 bool OutputSigned = OpsFromSigned;
1647 switch (BO.getOpcode()) {
1648 case Instruction::FAdd:
1649 IntOpc = Instruction::Add;
1650 OverflowMaxOutputBits += OverflowMaxCurBits;
1651 break;
1652 case Instruction::FSub:
1653 IntOpc = Instruction::Sub;
1654 OverflowMaxOutputBits += OverflowMaxCurBits;
1655 break;
1656 case Instruction::FMul:
1657 IntOpc = Instruction::Mul;
1658 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1659 break;
1660 default:
1661 llvm_unreachable("Unsupported binop");
1662 }
1663 // The precision check may have already ruled out overflow.
1664 if (OverflowMaxOutputBits < IntSz) {
1665 NeedsOverflowCheck = false;
1666 // We can bound unsigned overflow from sub to in range signed value (this is
1667 // what allows us to avoid the overflow check for sub).
1668 if (IntOpc == Instruction::Sub)
1669 OutputSigned = true;
1670 }
1671
1672 // Precision check did not rule out overflow, so need to check.
1673 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1674 // `IntOps[...]` arguments to `KnownOps[...]`.
1675 if (NeedsOverflowCheck &&
1676 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1677 return nullptr;
1678
1679 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1680 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1681 IntBO->setHasNoSignedWrap(OutputSigned);
1682 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1683 }
1684 if (OutputSigned)
1685 return new SIToFPInst(IntBinOp, FPTy);
1686 return new UIToFPInst(IntBinOp, FPTy);
1687}
1688
1689// Try to fold:
1690// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1691// -> ({s|u}itofp (int_binop x, y))
1692// 2) (fp_binop ({s|u}itofp x), FpC)
1693// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1694Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1695 // Don't perform the fold on vectors, as the integer operation may be much
1696 // more expensive than the float operation in that case.
1697 if (BO.getType()->isVectorTy())
1698 return nullptr;
1699
1700 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1701 Constant *Op1FpC = nullptr;
1702 // Check for:
1703 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1704 // 2) (binop ({s|u}itofp x), FpC)
1705 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_IToFP(Op: m_Value(V&: IntOps[0]))))
1706 return nullptr;
1707
1708 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1709 !match(V: BO.getOperand(i_nocapture: 1), P: m_IToFP(Op: m_Value(V&: IntOps[1]))))
1710 return nullptr;
1711
1712 // Cache KnownBits a bit to potentially save some analysis.
1713 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1714
1715 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1716 // different constraints depending on the sign of the cast.
1717 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1718 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1719 IntOps, Op1FpC, OpsKnown))
1720 return R;
1721 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1722 Op1FpC, OpsKnown);
1723}
1724
1725/// A binop with a constant operand and a sign-extended boolean operand may be
1726/// converted into a select of constants by applying the binary operation to
1727/// the constant with the two possible values of the extended boolean (0 or -1).
1728Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1729 // TODO: Handle non-commutative binop (constant is operand 0).
1730 // TODO: Handle zext.
1731 // TODO: Peek through 'not' of cast.
1732 Value *BO0 = BO.getOperand(i_nocapture: 0);
1733 Value *BO1 = BO.getOperand(i_nocapture: 1);
1734 Value *X;
1735 Constant *C;
1736 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1737 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1738 return nullptr;
1739
1740 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1741 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1742 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1743 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1744 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1745 return createSelectInstWithUnknownProfile(C: X, S1: TVal, S2: FVal);
1746}
1747
1748static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1749 bool IsTrueArm) {
1750 SmallVector<Value *> Ops;
1751 for (Value *Op : I.operands()) {
1752 Value *V = nullptr;
1753 if (Op == SI) {
1754 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1755 } else if (match(V: SI->getCondition(),
1756 P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1757 : ICmpInst::ICMP_NE,
1758 L: m_Specific(V: Op), R: m_Value(V))) &&
1759 isGuaranteedNotToBeUndefOrPoison(V)) {
1760 // Pass
1761 } else if (match(V: Op, P: m_ZExt(Op: m_Specific(V: SI->getCondition())))) {
1762 V = IsTrueArm ? ConstantInt::get(Ty: Op->getType(), V: 1)
1763 : ConstantInt::getNullValue(Ty: Op->getType());
1764 } else {
1765 V = Op;
1766 }
1767 Ops.push_back(Elt: V);
1768 }
1769
1770 return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1771}
1772
1773static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1774 Value *NewOp, InstCombiner &IC) {
1775 Instruction *Clone = I.clone();
1776 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1777 Clone->dropUBImplyingAttrsAndMetadata();
1778 IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1779 return Clone;
1780}
1781
1782Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1783 bool FoldWithMultiUse,
1784 bool SimplifyBothArms) {
1785 // Don't modify shared select instructions unless set FoldWithMultiUse
1786 if (!SI->hasOneUser() && !FoldWithMultiUse)
1787 return nullptr;
1788
1789 Value *TV = SI->getTrueValue();
1790 Value *FV = SI->getFalseValue();
1791
1792 // Bool selects with constant operands can be folded to logical ops.
1793 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1794 return nullptr;
1795
1796 // Avoid breaking min/max reduction pattern,
1797 // which is necessary for vectorization later.
1798 if (isa<MinMaxIntrinsic>(Val: &Op))
1799 for (Value *IntrinOp : Op.operands())
1800 if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1801 for (Value *PhiOp : PN->operands())
1802 if (PhiOp == &Op)
1803 return nullptr;
1804
1805 // Test if a FCmpInst instruction is used exclusively by a select as
1806 // part of a minimum or maximum operation. If so, refrain from doing
1807 // any other folding. This helps out other analyses which understand
1808 // non-obfuscated minimum and maximum idioms. And in this case, at
1809 // least one of the comparison operands has at least one user besides
1810 // the compare (the select), which would often largely negate the
1811 // benefit of folding anyway.
1812 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1813 if (CI->hasOneUse()) {
1814 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1815 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1816 !CI->isCommutative())
1817 return nullptr;
1818 }
1819 }
1820
1821 // Make sure that one of the select arms folds successfully.
1822 Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true);
1823 Value *NewFV =
1824 simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false);
1825 if (!NewTV && !NewFV)
1826 return nullptr;
1827
1828 if (SimplifyBothArms && !(NewTV && NewFV))
1829 return nullptr;
1830
1831 // Create an instruction for the arm that did not fold.
1832 if (!NewTV)
1833 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1834 if (!NewFV)
1835 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1836
1837 SelectInst *NewSel = SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV);
1838
1839 // Preserve metadata that remains valid for the transformed select.
1840 NewSel->copyMetadata(SrcInst: *SI,
1841 WL: {LLVMContext::MD_prof, LLVMContext::MD_unpredictable});
1842
1843 // Preserve source location information.
1844 NewSel->setDebugLoc(SI->getDebugLoc());
1845
1846 return NewSel;
1847}
1848
1849static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1850 Value *InValue, BasicBlock *InBB,
1851 const DataLayout &DL,
1852 const SimplifyQuery SQ) {
1853 // NB: It is a precondition of this transform that the operands be
1854 // phi translatable!
1855 SmallVector<Value *> Ops;
1856 for (Value *Op : I.operands()) {
1857 if (Op == PN)
1858 Ops.push_back(Elt: InValue);
1859 else
1860 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1861 }
1862
1863 // Don't consider the simplification successful if we get back a constant
1864 // expression. That's just an instruction in hiding.
1865 // Also reject the case where we simplify back to the phi node. We wouldn't
1866 // be able to remove it in that case.
1867 Value *NewVal = simplifyInstructionWithOperands(
1868 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1869 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1870 return NewVal;
1871
1872 // Check if incoming PHI value can be replaced with constant
1873 // based on implied condition.
1874 CondBrInst *TerminatorBI = dyn_cast<CondBrInst>(Val: InBB->getTerminator());
1875 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1876 if (TerminatorBI &&
1877 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1878 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1879 std::optional<bool> ImpliedCond = isImpliedCondition(
1880 LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1],
1881 DL, LHSIsTrue);
1882 if (ImpliedCond)
1883 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1884 }
1885
1886 return nullptr;
1887}
1888
1889/// In some cases it is beneficial to fold a select into a binary operator.
1890/// For example:
1891/// %1 = or %in, 4
1892/// %2 = select %cond, %1, %in
1893/// %3 = or %2, 1
1894/// =>
1895/// %1 = select i1 %cond, 5, 1
1896/// %2 = or %1, %in
1897Instruction *InstCombinerImpl::foldBinOpSelectBinOp(BinaryOperator &Op) {
1898 assert(Op.isAssociative() && "The operation must be associative!");
1899
1900 SelectInst *SI = dyn_cast<SelectInst>(Val: Op.getOperand(i_nocapture: 0));
1901
1902 Constant *Const;
1903 if (!SI || !match(V: Op.getOperand(i_nocapture: 1), P: m_ImmConstant(C&: Const)) ||
1904 !Op.hasOneUse() || !SI->hasOneUse())
1905 return nullptr;
1906
1907 Value *TV = SI->getTrueValue();
1908 Value *FV = SI->getFalseValue();
1909 Value *Input, *NewTV, *NewFV;
1910 Constant *Const2;
1911
1912 if (TV->hasOneUse() && match(V: TV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: FV),
1913 R: m_ImmConstant(C&: Const2)))) {
1914 NewTV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1915 NewFV = Const;
1916 Input = FV;
1917 } else if (FV->hasOneUse() &&
1918 match(V: FV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: TV),
1919 R: m_ImmConstant(C&: Const2)))) {
1920 NewTV = Const;
1921 NewFV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1922 Input = TV;
1923 } else
1924 return nullptr;
1925
1926 if (!NewTV || !NewFV)
1927 return nullptr;
1928
1929 Value *NewSI =
1930 Builder.CreateSelect(C: SI->getCondition(), True: NewTV, False: NewFV, Name: "",
1931 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : SI);
1932 return BinaryOperator::Create(Op: Op.getOpcode(), S1: NewSI, S2: Input);
1933}
1934
1935Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1936 bool AllowMultipleUses) {
1937 unsigned NumPHIValues = PN->getNumIncomingValues();
1938 if (NumPHIValues == 0)
1939 return nullptr;
1940
1941 // We normally only transform phis with a single use. However, if a PHI has
1942 // multiple uses and they are all the same operation, we can fold *all* of the
1943 // uses into the PHI.
1944 bool OneUse = PN->hasOneUse();
1945 bool IdenticalUsers = false;
1946 if (!AllowMultipleUses && !OneUse) {
1947 // Walk the use list for the instruction, comparing them to I.
1948 for (User *U : PN->users()) {
1949 Instruction *UI = cast<Instruction>(Val: U);
1950 if (UI != &I && !I.isIdenticalTo(I: UI))
1951 return nullptr;
1952 }
1953 // Otherwise, we can replace *all* users with the new PHI we form.
1954 IdenticalUsers = true;
1955 }
1956
1957 // Check that all operands are phi-translatable.
1958 for (Value *Op : I.operands()) {
1959 if (Op == PN)
1960 continue;
1961
1962 // Non-instructions never require phi-translation.
1963 auto *I = dyn_cast<Instruction>(Val: Op);
1964 if (!I)
1965 continue;
1966
1967 // Phi-translate can handle phi nodes in the same block.
1968 if (isa<PHINode>(Val: I))
1969 if (I->getParent() == PN->getParent())
1970 continue;
1971
1972 // Operand dominates the block, no phi-translation necessary.
1973 if (DT.dominates(Def: I, BB: PN->getParent()))
1974 continue;
1975
1976 // Not phi-translatable, bail out.
1977 return nullptr;
1978 }
1979
1980 // Check to see whether the instruction can be folded into each phi operand.
1981 // If there is one operand that does not fold, remember the BB it is in.
1982 SmallVector<Value *> NewPhiValues;
1983 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1984 bool SeenNonSimplifiedInVal = false;
1985 for (unsigned i = 0; i != NumPHIValues; ++i) {
1986 Value *InVal = PN->getIncomingValue(i);
1987 BasicBlock *InBB = PN->getIncomingBlock(i);
1988
1989 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1990 NewPhiValues.push_back(Elt: NewVal);
1991 continue;
1992 }
1993
1994 // Handle some cases that can't be fully simplified, but where we know that
1995 // the two instructions will fold into one.
1996 auto WillFold = [&]() {
1997 if (!InVal->hasUseList() || !InVal->hasOneUser())
1998 return false;
1999
2000 // icmp of ucmp/scmp with constant will fold to icmp.
2001 const APInt *Ignored;
2002 if (isa<CmpIntrinsic>(Val: InVal) &&
2003 match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
2004 return true;
2005
2006 // icmp eq zext(bool), 0 will fold to !bool.
2007 if (isa<ZExtInst>(Val: InVal) &&
2008 cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) &&
2009 match(V: &I,
2010 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
2011 return true;
2012
2013 return false;
2014 };
2015
2016 if (WillFold()) {
2017 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2018 NewPhiValues.push_back(Elt: nullptr);
2019 continue;
2020 }
2021
2022 if (!OneUse && !IdenticalUsers)
2023 return nullptr;
2024
2025 if (SeenNonSimplifiedInVal)
2026 return nullptr; // More than one non-simplified value.
2027 SeenNonSimplifiedInVal = true;
2028
2029 // If there is exactly one non-simplified value, we can insert a copy of the
2030 // operation in that block. However, if this is a critical edge, we would
2031 // be inserting the computation on some other paths (e.g. inside a loop).
2032 // Only do this if the pred block is unconditionally branching into the phi
2033 // block. Also, make sure that the pred block is not dead code.
2034 UncondBrInst *BI = dyn_cast<UncondBrInst>(Val: InBB->getTerminator());
2035 if (!BI || !DT.isReachableFromEntry(A: InBB))
2036 return nullptr;
2037
2038 NewPhiValues.push_back(Elt: nullptr);
2039 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2040
2041 // Do not push the operation across a loop backedge. This could result in
2042 // an infinite combine loop, and is generally non-profitable (especially
2043 // if the operation was originally outside the loop).
2044 if (isBackEdge(From: InBB, To: PN->getParent()))
2045 return nullptr;
2046 }
2047
2048 // Clone the instruction that uses the phi node and move it into the incoming
2049 // BB because we know that the next iteration of InstCombine will simplify it.
2050 SmallDenseMap<BasicBlock *, Instruction *> Clones;
2051 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2052 Value *Op = PN->getIncomingValue(i: OpIndex);
2053 BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
2054
2055 Instruction *Clone = Clones.lookup(Val: OpBB);
2056 if (!Clone) {
2057 Clone = I.clone();
2058 for (Use &U : Clone->operands()) {
2059 if (U == PN)
2060 U = Op;
2061 else
2062 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
2063 }
2064 Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
2065 Clones.insert(KV: {OpBB, Clone});
2066 // We may have speculated the instruction.
2067 Clone->dropUBImplyingAttrsAndMetadata();
2068 }
2069
2070 NewPhiValues[OpIndex] = Clone;
2071 }
2072
2073 // Okay, we can do the transformation: create the new PHI node.
2074 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
2075 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
2076 NewPN->takeName(V: PN);
2077 NewPN->setDebugLoc(PN->getDebugLoc());
2078
2079 for (unsigned i = 0; i != NumPHIValues; ++i)
2080 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
2081
2082 if (IdenticalUsers) {
2083 // Collect and deduplicate users up-front to avoid iterator invalidation.
2084 SmallSetVector<Instruction *, 4> ToReplace;
2085 for (User *U : PN->users()) {
2086 Instruction *User = cast<Instruction>(Val: U);
2087 if (User == &I)
2088 continue;
2089 ToReplace.insert(X: User);
2090 }
2091 for (Instruction *I : ToReplace) {
2092 replaceInstUsesWith(I&: *I, V: NewPN);
2093 eraseInstFromFunction(I&: *I);
2094 }
2095 OneUse = true;
2096 }
2097
2098 if (OneUse) {
2099 replaceAllDbgUsesWith(From&: *PN, To&: *NewPN, DomPoint&: *PN, DT);
2100 }
2101 return replaceInstUsesWith(I, V: NewPN);
2102}
2103
2104Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
2105 if (!BO.isAssociative())
2106 return nullptr;
2107
2108 // Find the interleaved binary ops.
2109 auto Opc = BO.getOpcode();
2110 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0));
2111 auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1));
2112 if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) ||
2113 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2114 !BO0->isAssociative() || !BO1->isAssociative() ||
2115 BO0->getParent() != BO1->getParent())
2116 return nullptr;
2117
2118 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2119 "Expected commutative instructions!");
2120
2121 // Find the matching phis, forming the recurrences.
2122 PHINode *PN0, *PN1;
2123 Value *Start0, *Step0, *Start1, *Step1;
2124 if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() ||
2125 !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() ||
2126 PN0->getParent() != PN1->getParent())
2127 return nullptr;
2128
2129 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2130 "Expected PHIs with two incoming values!");
2131
2132 // Convert the start and step values to constants.
2133 auto *Init0 = dyn_cast<Constant>(Val: Start0);
2134 auto *Init1 = dyn_cast<Constant>(Val: Start1);
2135 auto *C0 = dyn_cast<Constant>(Val: Step0);
2136 auto *C1 = dyn_cast<Constant>(Val: Step1);
2137 if (!Init0 || !Init1 || !C0 || !C1)
2138 return nullptr;
2139
2140 // Fold the recurrence constants.
2141 auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2142 auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2143 if (!Init || !C)
2144 return nullptr;
2145
2146 // Create the reduced PHI.
2147 auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2148 NameStr: "reduced.phi");
2149
2150 // Create the new binary op.
2151 auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2152 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2153 // Intersect FMF flags for FADD and FMUL.
2154 FastMathFlags Intersect = BO0->getFastMathFlags() &
2155 BO1->getFastMathFlags() & BO.getFastMathFlags();
2156 NewBO->setFastMathFlags(Intersect);
2157 } else {
2158 OverflowTracking Flags;
2159 Flags.AllKnownNonNegative = false;
2160 Flags.AllKnownNonZero = false;
2161 Flags.mergeFlags(I&: *BO0);
2162 Flags.mergeFlags(I&: *BO1);
2163 Flags.mergeFlags(I&: BO);
2164 Flags.applyFlags(I&: *NewBO);
2165 }
2166 NewBO->takeName(V: &BO);
2167
2168 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2169 auto *V = PN0->getIncomingValue(i: I);
2170 auto *BB = PN0->getIncomingBlock(i: I);
2171 if (V == Init0) {
2172 assert(((PN1->getIncomingValue(0) == Init1 &&
2173 PN1->getIncomingBlock(0) == BB) ||
2174 (PN1->getIncomingValue(1) == Init1 &&
2175 PN1->getIncomingBlock(1) == BB)) &&
2176 "Invalid incoming block!");
2177 NewPN->addIncoming(V: Init, BB);
2178 } else if (V == BO0) {
2179 assert(((PN1->getIncomingValue(0) == BO1 &&
2180 PN1->getIncomingBlock(0) == BB) ||
2181 (PN1->getIncomingValue(1) == BO1 &&
2182 PN1->getIncomingBlock(1) == BB)) &&
2183 "Invalid incoming block!");
2184 NewPN->addIncoming(V: NewBO, BB);
2185 } else
2186 llvm_unreachable("Unexpected incoming value!");
2187 }
2188
2189 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2190 << "\n with " << *PN1 << "\n " << *BO1
2191 << '\n');
2192
2193 // Insert the new recurrence and remove the old (dead) ones.
2194 InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2195 InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2196
2197 eraseInstFromFunction(
2198 I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType())));
2199 eraseInstFromFunction(
2200 I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType())));
2201 eraseInstFromFunction(I&: *PN0);
2202 eraseInstFromFunction(I&: *PN1);
2203
2204 return replaceInstUsesWith(I&: BO, V: NewBO);
2205}
2206
2207Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2208 // Attempt to fold binary operators whose operands are simple recurrences.
2209 if (auto *NewBO = foldBinopWithRecurrence(BO))
2210 return NewBO;
2211
2212 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2213 // we are guarding against replicating the binop in >1 predecessor.
2214 // This could miss matching a phi with 2 constant incoming values.
2215 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
2216 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
2217 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2218 Phi0->getNumOperands() != Phi1->getNumOperands())
2219 return nullptr;
2220
2221 // TODO: Remove the restriction for binop being in the same block as the phis.
2222 if (BO.getParent() != Phi0->getParent() ||
2223 BO.getParent() != Phi1->getParent())
2224 return nullptr;
2225
2226 // Fold if there is at least one specific constant value in phi0 or phi1's
2227 // incoming values that comes from the same block and this specific constant
2228 // value can be used to do optimization for specific binary operator.
2229 // For example:
2230 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2231 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2232 // %add = add i32 %phi0, %phi1
2233 // ==>
2234 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2235 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2236 /*AllowRHSConstant*/ false);
2237 if (C) {
2238 SmallVector<Value *, 4> NewIncomingValues;
2239 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2240 auto &Phi0Use = std::get<0>(t&: T);
2241 auto &Phi1Use = std::get<1>(t&: T);
2242 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2243 return false;
2244 Value *Phi0UseV = Phi0Use.get();
2245 Value *Phi1UseV = Phi1Use.get();
2246 if (Phi0UseV == C)
2247 NewIncomingValues.push_back(Elt: Phi1UseV);
2248 else if (Phi1UseV == C)
2249 NewIncomingValues.push_back(Elt: Phi0UseV);
2250 else
2251 return false;
2252 return true;
2253 };
2254
2255 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2256 P: CanFoldIncomingValuePair)) {
2257 PHINode *NewPhi =
2258 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2259 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2260 "The number of collected incoming values should equal the number "
2261 "of the original PHINode operands!");
2262 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2263 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
2264 return NewPhi;
2265 }
2266 }
2267
2268 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2269 return nullptr;
2270
2271 // Match a pair of incoming constants for one of the predecessor blocks.
2272 BasicBlock *ConstBB, *OtherBB;
2273 Constant *C0, *C1;
2274 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
2275 ConstBB = Phi0->getIncomingBlock(i: 0);
2276 OtherBB = Phi0->getIncomingBlock(i: 1);
2277 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
2278 ConstBB = Phi0->getIncomingBlock(i: 1);
2279 OtherBB = Phi0->getIncomingBlock(i: 0);
2280 } else {
2281 return nullptr;
2282 }
2283 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2284 return nullptr;
2285
2286 // The block that we are hoisting to must reach here unconditionally.
2287 // Otherwise, we could be speculatively executing an expensive or
2288 // non-speculative op.
2289 auto *PredBlockBranch = dyn_cast<UncondBrInst>(Val: OtherBB->getTerminator());
2290 if (!PredBlockBranch || !DT.isReachableFromEntry(A: OtherBB))
2291 return nullptr;
2292
2293 // TODO: This check could be tightened to only apply to binops (div/rem) that
2294 // are not safe to speculatively execute. But that could allow hoisting
2295 // potentially expensive instructions (fdiv for example).
2296 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2297 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2298 return nullptr;
2299
2300 // Fold constants for the predecessor block with constant incoming values.
2301 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2302 if (!NewC)
2303 return nullptr;
2304
2305 // Make a new binop in the predecessor block with the non-constant incoming
2306 // values.
2307 Builder.SetInsertPoint(PredBlockBranch);
2308 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2309 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2310 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2311 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2312 NotFoldedNewBO->copyIRFlags(V: &BO);
2313
2314 // Replace the binop with a phi of the new values. The old phis are dead.
2315 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
2316 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2317 NewPhi->addIncoming(V: NewC, BB: ConstBB);
2318 return NewPhi;
2319}
2320
2321Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2322 auto TryFoldOperand = [&](unsigned OpIdx,
2323 bool IsOtherParamConst) -> Instruction * {
2324 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: OpIdx)))
2325 return FoldOpIntoSelect(Op&: I, SI: Sel, FoldWithMultiUse: false, SimplifyBothArms: !IsOtherParamConst);
2326 if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: OpIdx)))
2327 return foldOpIntoPhi(I, PN);
2328 return nullptr;
2329 };
2330
2331 if (Instruction *NewI =
2332 TryFoldOperand(/*OpIdx=*/0, isa<Constant>(Val: I.getOperand(i_nocapture: 1))))
2333 return NewI;
2334 return TryFoldOperand(/*OpIdx=*/1, isa<Constant>(Val: I.getOperand(i_nocapture: 0)));
2335}
2336
2337static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2338 // If this GEP has only 0 indices, it is the same pointer as
2339 // Src. If Src is not a trivial GEP too, don't combine
2340 // the indices.
2341 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2342 !Src.hasOneUse())
2343 return false;
2344 return true;
2345}
2346
2347/// Find a constant NewC that has property:
2348/// shuffle(NewC, poison, ShMask) = C
2349/// for lanes that select NewC. Lanes that select the poison operand are not
2350/// constrained.
2351/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2352///
2353/// A 1-to-1 mapping is not required. Example:
2354/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2355Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
2356 VectorType *NewCTy) {
2357 if (isa<ScalableVectorType>(Val: NewCTy)) {
2358 Constant *Splat = C->getSplatValue();
2359 if (!Splat)
2360 return nullptr;
2361 return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2362 }
2363
2364 if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2365 cast<FixedVectorType>(Val: C->getType())->getNumElements())
2366 return nullptr;
2367
2368 unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2369 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2370 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2371 unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2372 for (unsigned I = 0; I < NumElts; ++I) {
2373 Constant *CElt = C->getAggregateElement(Elt: I);
2374 if (ShMask[I] >= 0) {
2375 int MaskElt = ShMask[I];
2376 if (MaskElt >= (int)NewCNumElts)
2377 continue;
2378
2379 Constant *NewCElt = NewVecC[MaskElt];
2380 // Bail out if:
2381 // 1. The constant vector contains a constant expression.
2382 // 2. The shuffle needs an element of the constant vector that can't
2383 // be mapped to a new constant vector.
2384 // 3. This is a widening shuffle that copies elements of V1 into the
2385 // extended elements (extending with poison is allowed).
2386 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2387 I >= NewCNumElts)
2388 return nullptr;
2389 NewVecC[MaskElt] = CElt;
2390 }
2391 }
2392 return ConstantVector::get(V: NewVecC);
2393}
2394
2395// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2396static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
2397 Constant *Splat, bool SplatLHS,
2398 const DataLayout &DL) {
2399 ElementCount EC = cast<VectorType>(Val: Vector->getType())->getElementCount();
2400 Constant *LHS = ConstantVector::getSplat(EC, Elt: Splat);
2401 Constant *RHS = Vector;
2402 if (!SplatLHS)
2403 std::swap(a&: LHS, b&: RHS);
2404 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2405}
2406
2407template <Intrinsic::ID SpliceID>
2408static Instruction *foldSpliceBinOp(BinaryOperator &Inst,
2409 InstCombiner::BuilderTy &Builder) {
2410 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2411 auto CreateBinOpSplice = [&](Value *X, Value *Y, Value *Offset) {
2412 Value *V = Builder.CreateBinOp(Opc: Inst.getOpcode(), LHS: X, RHS: Y, Name: Inst.getName());
2413 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2414 BO->copyIRFlags(V: &Inst);
2415 Module *M = Inst.getModule();
2416 Function *F = Intrinsic::getOrInsertDeclaration(M, id: SpliceID, OverloadTys: V->getType());
2417 return CallInst::Create(Func: F, Args: {V, PoisonValue::get(T: V->getType()), Offset});
2418 };
2419 Value *V1, *V2, *Offset;
2420 if (match(LHS,
2421 m_Intrinsic<SpliceID>(m_Value(V&: V1), m_Poison(), m_Value(V&: Offset)))) {
2422 // Op(splice(V1, poison, offset), splice(V2, poison, offset))
2423 // -> splice(Op(V1, V2), poison, offset)
2424 if (match(RHS, m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2425 m_Specific(V: Offset))) &&
2426 (LHS->hasOneUse() || RHS->hasOneUse() ||
2427 (LHS == RHS && LHS->hasNUses(N: 2))))
2428 return CreateBinOpSplice(V1, V2, Offset);
2429
2430 // Op(splice(V1, poison, offset), RHSSplat)
2431 // -> splice(Op(V1, RHSSplat), poison, offset)
2432 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2433 return CreateBinOpSplice(V1, RHS, Offset);
2434 }
2435 // Op(LHSSplat, splice(V2, poison, offset))
2436 // -> splice(Op(LHSSplat, V2), poison, offset)
2437 else if (isSplatValue(V: LHS) &&
2438 match(RHS, m_OneUse(m_Intrinsic<SpliceID>(m_Value(V&: V2), m_Poison(),
2439 m_Value(V&: Offset)))))
2440 return CreateBinOpSplice(LHS, V2, Offset);
2441
2442 // TODO: Fold binops of the form
2443 // Op(splice(poison, V1, offset), splice(poison, V2, offset))
2444 // -> splice(poison, Op(V1, V2), offset)
2445
2446 return nullptr;
2447}
2448
2449Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2450 if (!isa<VectorType>(Val: Inst.getType()))
2451 return nullptr;
2452
2453 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2454 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2455 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2456 cast<VectorType>(Inst.getType())->getElementCount());
2457 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2458 cast<VectorType>(Inst.getType())->getElementCount());
2459
2460 auto foldConstantsThroughSubVectorInsertSplat =
2461 [&](Value *MaybeSubVector, Value *MaybeSplat,
2462 bool SplatLHS) -> Instruction * {
2463 Value *Idx;
2464 Constant *Splat, *SubVector, *Dest;
2465 if (!match(V: MaybeSplat, P: m_ConstantSplat(SubPattern: m_Constant(C&: Splat))) ||
2466 !match(V: MaybeSubVector,
2467 P: m_VectorInsert(Op0: m_Constant(C&: Dest), Op1: m_Constant(C&: SubVector),
2468 Op2: m_Value(V&: Idx))))
2469 return nullptr;
2470 SubVector =
2471 constantFoldBinOpWithSplat(Opcode, Vector: SubVector, Splat, SplatLHS, DL);
2472 Dest = constantFoldBinOpWithSplat(Opcode, Vector: Dest, Splat, SplatLHS, DL);
2473 if (!SubVector || !Dest)
2474 return nullptr;
2475 auto *InsertVector =
2476 Builder.CreateInsertVector(DstType: Dest->getType(), SrcVec: Dest, SubVec: SubVector, Idx);
2477 return replaceInstUsesWith(I&: Inst, V: InsertVector);
2478 };
2479
2480 // If one operand is a constant splat and the other operand is a
2481 // `vector.insert` where both the destination and subvector are constant,
2482 // apply the operation to both the destination and subvector, returning a new
2483 // constant `vector.insert`. This helps constant folding for scalable vectors.
2484 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2485 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2486 return Folded;
2487 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2488 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2489 return Folded;
2490
2491 // If both operands of the binop are vector concatenations, then perform the
2492 // narrow binop on each pair of the source operands followed by concatenation
2493 // of the results.
2494 Value *L0, *L1, *R0, *R1;
2495 ArrayRef<int> Mask;
2496 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2497 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2498 LHS->hasOneUse() && RHS->hasOneUse() &&
2499 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2500 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2501 // This transform does not have the speculative execution constraint as
2502 // below because the shuffle is a concatenation. The new binops are
2503 // operating on exactly the same elements as the existing binop.
2504 // TODO: We could ease the mask requirement to allow different undef lanes,
2505 // but that requires an analysis of the binop-with-undef output value.
2506 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2507 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2508 BO->copyIRFlags(V: &Inst);
2509 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2510 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2511 BO->copyIRFlags(V: &Inst);
2512 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2513 }
2514
2515 auto createBinOpReverse = [&](Value *X, Value *Y) {
2516 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2517 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2518 BO->copyIRFlags(V: &Inst);
2519 Module *M = Inst.getModule();
2520 Function *F = Intrinsic::getOrInsertDeclaration(
2521 M, id: Intrinsic::vector_reverse, OverloadTys: V->getType());
2522 return CallInst::Create(Func: F, Args: V);
2523 };
2524
2525 // NOTE: Reverse shuffles don't require the speculative execution protection
2526 // below because they don't affect which lanes take part in the computation.
2527
2528 Value *V1, *V2;
2529 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2530 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2531 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2532 (LHS->hasOneUse() || RHS->hasOneUse() ||
2533 (LHS == RHS && LHS->hasNUses(N: 2))))
2534 return createBinOpReverse(V1, V2);
2535
2536 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2537 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2538 return createBinOpReverse(V1, RHS);
2539 }
2540 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2541 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2542 return createBinOpReverse(LHS, V2);
2543
2544 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2545 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2546 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2547 BO->copyIRFlags(V: &Inst);
2548
2549 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2550 Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2551 Module *M = Inst.getModule();
2552 Function *F = Intrinsic::getOrInsertDeclaration(
2553 M, id: Intrinsic::experimental_vp_reverse, OverloadTys: V->getType());
2554 return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2555 };
2556
2557 Value *EVL;
2558 if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2559 Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2560 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2561 if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2562 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2563 (LHS->hasOneUse() || RHS->hasOneUse() ||
2564 (LHS == RHS && LHS->hasNUses(N: 2))))
2565 return createBinOpVPReverse(V1, V2, EVL);
2566
2567 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2568 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2569 return createBinOpVPReverse(V1, RHS, EVL);
2570 }
2571 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2572 else if (isSplatValue(V: LHS) &&
2573 match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2574 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2575 return createBinOpVPReverse(LHS, V2, EVL);
2576
2577 if (Instruction *Folded =
2578 foldSpliceBinOp<Intrinsic::vector_splice_left>(Inst, Builder))
2579 return Folded;
2580 if (Instruction *Folded =
2581 foldSpliceBinOp<Intrinsic::vector_splice_right>(Inst, Builder))
2582 return Folded;
2583
2584 // It may not be safe to reorder shuffles and things like div, urem, etc.
2585 // because we may trap when executing those ops on unknown vector elements.
2586 // See PR20059.
2587 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2588 return nullptr;
2589
2590 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2591 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2592 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2593 BO->copyIRFlags(V: &Inst);
2594 return new ShuffleVectorInst(XY, M);
2595 };
2596
2597 // If both arguments of the binary operation are shuffles that use the same
2598 // mask and shuffle within a single vector, move the shuffle after the binop.
2599 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2600 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2601 V1->getType() == V2->getType() &&
2602 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2603 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2604 return createBinOpShuffle(V1, V2, Mask);
2605 }
2606
2607 // If both arguments of a commutative binop are select-shuffles that use the
2608 // same mask with commuted operands, the shuffles are unnecessary.
2609 if (Inst.isCommutative() &&
2610 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2611 match(V: RHS,
2612 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2613 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2614 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2615 // TODO: Allow shuffles that contain undefs in the mask?
2616 // That is legal, but it reduces undef knowledge.
2617 // TODO: Allow arbitrary shuffles by shuffling after binop?
2618 // That might be legal, but we have to deal with poison.
2619 if (LShuf->isSelect() &&
2620 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2621 RShuf->isSelect() &&
2622 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2623 // Example:
2624 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2625 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2626 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2627 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2628 NewBO->copyIRFlags(V: &Inst);
2629 return NewBO;
2630 }
2631 }
2632
2633 // If one argument is a shuffle within one vector and the other is a constant,
2634 // try moving the shuffle after the binary operation. This canonicalization
2635 // intends to move shuffles closer to other shuffles and binops closer to
2636 // other binops, so they can be folded. It may also enable demanded elements
2637 // transforms.
2638 Constant *C;
2639 if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2640 mask: m_Mask(Mask))),
2641 R: m_ImmConstant(C)))) {
2642 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2643 "Shuffle should not change scalar type");
2644
2645 bool ConstOp1 = isa<Constant>(Val: RHS);
2646 if (Constant *NewC =
2647 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2648 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2649 // which will cause UB for div/rem. Mask them with a safe constant.
2650 if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2651 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2652
2653 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2654 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2655 Value *NewLHS = ConstOp1 ? V1 : NewC;
2656 Value *NewRHS = ConstOp1 ? NewC : V1;
2657 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2658 }
2659 }
2660
2661 // Try to reassociate to sink a splat shuffle after a binary operation.
2662 if (Inst.isAssociative() && Inst.isCommutative()) {
2663 // Canonicalize shuffle operand as LHS.
2664 if (isa<ShuffleVectorInst>(Val: RHS))
2665 std::swap(a&: LHS, b&: RHS);
2666
2667 Value *X;
2668 ArrayRef<int> MaskC;
2669 int SplatIndex;
2670 Value *Y, *OtherOp;
2671 if (!match(V: LHS,
2672 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2673 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2674 X->getType() != Inst.getType() ||
2675 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2676 return nullptr;
2677
2678 // FIXME: This may not be safe if the analysis allows undef elements. By
2679 // moving 'Y' before the splat shuffle, we are implicitly assuming
2680 // that it is not undef/poison at the splat index.
2681 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2682 std::swap(a&: Y, b&: OtherOp);
2683 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2684 return nullptr;
2685 }
2686
2687 // X and Y are splatted values, so perform the binary operation on those
2688 // values followed by a splat followed by the 2nd binary operation:
2689 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2690 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2691 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2692 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2693 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2694
2695 // Intersect FMF on both new binops. Other (poison-generating) flags are
2696 // dropped to be safe.
2697 if (isa<FPMathOperator>(Val: R)) {
2698 R->copyFastMathFlags(I: &Inst);
2699 R->andIRFlags(V: RHS);
2700 }
2701 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2702 NewInstBO->copyIRFlags(V: R);
2703 return R;
2704 }
2705
2706 return nullptr;
2707}
2708
2709/// Try to narrow the width of a binop if at least 1 operand is an extend of
2710/// of a value. This requires a potentially expensive known bits check to make
2711/// sure the narrow op does not overflow.
2712Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2713 // We need at least one extended operand.
2714 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2715
2716 // If this is a sub, we swap the operands since we always want an extension
2717 // on the RHS. The LHS can be an extension or a constant.
2718 if (BO.getOpcode() == Instruction::Sub)
2719 std::swap(a&: Op0, b&: Op1);
2720
2721 Value *X;
2722 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2723 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2724 return nullptr;
2725
2726 // If both operands are the same extension from the same source type and we
2727 // can eliminate at least one (hasOneUse), this might work.
2728 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2729 Value *Y;
2730 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2731 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2732 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2733 // If that did not match, see if we have a suitable constant operand.
2734 // Truncating and extending must produce the same constant.
2735 Constant *WideC;
2736 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2737 return nullptr;
2738 Constant *NarrowC = getLosslessInvCast(C: WideC, InvCastTo: X->getType(), CastOp: CastOpc, DL);
2739 if (!NarrowC)
2740 return nullptr;
2741 Y = NarrowC;
2742 }
2743
2744 // Swap back now that we found our operands.
2745 if (BO.getOpcode() == Instruction::Sub)
2746 std::swap(a&: X, b&: Y);
2747
2748 // Both operands have narrow versions. Last step: the math must not overflow
2749 // in the narrow width.
2750 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2751 return nullptr;
2752
2753 // bo (ext X), (ext Y) --> ext (bo X, Y)
2754 // bo (ext X), C --> ext (bo X, C')
2755 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2756 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2757 if (IsSext)
2758 NewBinOp->setHasNoSignedWrap();
2759 else
2760 NewBinOp->setHasNoUnsignedWrap();
2761 }
2762 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2763}
2764
2765/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2766/// transform.
2767static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2768 GEPOperator &GEP2) {
2769 return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2770}
2771
2772/// Thread a GEP operation with constant indices through the constant true/false
2773/// arms of a select.
2774static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2775 InstCombiner::BuilderTy &Builder) {
2776 if (!GEP.hasAllConstantIndices())
2777 return nullptr;
2778
2779 Instruction *Sel;
2780 Value *Cond;
2781 Constant *TrueC, *FalseC;
2782 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2783 !match(V: Sel,
2784 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2785 return nullptr;
2786
2787 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2788 // Propagate 'inbounds' and metadata from existing instructions.
2789 // Note: using IRBuilder to create the constants for efficiency.
2790 SmallVector<Value *, 4> IndexC(GEP.indices());
2791 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2792 Type *Ty = GEP.getSourceElementType();
2793 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2794 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2795 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2796}
2797
2798// Canonicalization:
2799// gep T, (gep i8, base, C1), (Index + C2) into
2800// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2801static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2802 GEPOperator *Src,
2803 InstCombinerImpl &IC) {
2804 if (GEP.getNumIndices() != 1)
2805 return nullptr;
2806 auto &DL = IC.getDataLayout();
2807 Value *Base;
2808 const APInt *C1;
2809 if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2810 return nullptr;
2811 Value *VarIndex;
2812 const APInt *C2;
2813 Type *PtrTy = Src->getType()->getScalarType();
2814 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2815 if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2816 return nullptr;
2817 if (C1->getBitWidth() != IndexSizeInBits ||
2818 C2->getBitWidth() != IndexSizeInBits)
2819 return nullptr;
2820 Type *BaseType = GEP.getSourceElementType();
2821 if (isa<ScalableVectorType>(Val: BaseType))
2822 return nullptr;
2823 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2824 APInt NewOffset = TypeSize * *C2 + *C1;
2825 if (NewOffset.isZero() ||
2826 (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) {
2827 GEPNoWrapFlags Flags = GEPNoWrapFlags::none();
2828 if (GEP.hasNoUnsignedWrap() &&
2829 cast<GEPOperator>(Val: Src)->hasNoUnsignedWrap() &&
2830 match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()))) {
2831 Flags |= GEPNoWrapFlags::noUnsignedWrap();
2832 if (GEP.isInBounds() && cast<GEPOperator>(Val: Src)->isInBounds())
2833 Flags |= GEPNoWrapFlags::inBounds();
2834 }
2835
2836 Value *GEPConst =
2837 IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset), Name: "", NW: Flags);
2838 return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex, NW: Flags);
2839 }
2840
2841 return nullptr;
2842}
2843
2844/// Combine constant offsets separated by variable offsets.
2845/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2846static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2847 InstCombinerImpl &IC) {
2848 if (!GEP.hasAllConstantIndices())
2849 return nullptr;
2850
2851 GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2852 SmallVector<GetElementPtrInst *> Skipped;
2853 auto *InnerGEP = dyn_cast<GetElementPtrInst>(Val: GEP.getPointerOperand());
2854 while (true) {
2855 if (!InnerGEP)
2856 return nullptr;
2857
2858 NW = NW.intersectForReassociate(Other: InnerGEP->getNoWrapFlags());
2859 if (InnerGEP->hasAllConstantIndices())
2860 break;
2861
2862 if (!InnerGEP->hasOneUse())
2863 return nullptr;
2864
2865 Skipped.push_back(Elt: InnerGEP);
2866 InnerGEP = dyn_cast<GetElementPtrInst>(Val: InnerGEP->getPointerOperand());
2867 }
2868
2869 // The two constant offset GEPs are directly adjacent: Let normal offset
2870 // merging handle it.
2871 if (Skipped.empty())
2872 return nullptr;
2873
2874 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2875 // if profitable.
2876 if (!InnerGEP->hasOneUse())
2877 return nullptr;
2878
2879 // Don't bother with vector splats.
2880 Type *Ty = GEP.getType();
2881 if (InnerGEP->getType() != Ty)
2882 return nullptr;
2883
2884 const DataLayout &DL = IC.getDataLayout();
2885 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2886 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2887 !InnerGEP->accumulateConstantOffset(DL, Offset))
2888 return nullptr;
2889
2890 IC.replaceOperand(I&: *Skipped.back(), OpNum: 0, V: InnerGEP->getPointerOperand());
2891 for (GetElementPtrInst *SkippedGEP : Skipped)
2892 SkippedGEP->setNoWrapFlags(NW);
2893
2894 return IC.replaceInstUsesWith(
2895 I&: GEP,
2896 V: IC.Builder.CreatePtrAdd(Ptr: Skipped.front(), Offset: IC.Builder.getInt(AI: Offset), Name: "",
2897 NW: NW.intersectForOffsetAdd(Other: GEP.getNoWrapFlags())));
2898}
2899
2900Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2901 GEPOperator *Src) {
2902 // Combine Indices - If the source pointer to this getelementptr instruction
2903 // is a getelementptr instruction with matching element type, combine the
2904 // indices of the two getelementptr instructions into a single instruction.
2905 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2906 return nullptr;
2907
2908 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this))
2909 return I;
2910
2911 if (auto *I = combineConstantOffsets(GEP, IC&: *this))
2912 return I;
2913
2914 if (Src->getResultElementType() != GEP.getSourceElementType())
2915 return nullptr;
2916
2917 // Fold chained GEP with constant base into single GEP:
2918 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2919 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2920 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2921 Src->getNumIndices() == 1) {
2922 Value *SrcIdx = *Src->idx_begin();
2923 Value *GEPIdx = *GEP.idx_begin();
2924 const APInt *ConstOffset, *TrueVal, *FalseVal;
2925 Value *Cond;
2926
2927 if ((match(V: SrcIdx, P: m_APInt(Res&: ConstOffset)) &&
2928 match(V: GEPIdx,
2929 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal)))) ||
2930 (match(V: GEPIdx, P: m_APInt(Res&: ConstOffset)) &&
2931 match(V: SrcIdx,
2932 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal))))) {
2933 auto *Select = isa<SelectInst>(Val: GEPIdx) ? cast<SelectInst>(Val: GEPIdx)
2934 : cast<SelectInst>(Val: SrcIdx);
2935
2936 // Make sure the select has only one use.
2937 if (!Select->hasOneUse())
2938 return nullptr;
2939
2940 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2941 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2942 return nullptr;
2943
2944 APInt NewTrueVal = *ConstOffset + *TrueVal;
2945 APInt NewFalseVal = *ConstOffset + *FalseVal;
2946 Constant *NewTrue = ConstantInt::get(Ty: Select->getType(), V: NewTrueVal);
2947 Constant *NewFalse = ConstantInt::get(Ty: Select->getType(), V: NewFalseVal);
2948 Value *NewSelect = Builder.CreateSelect(
2949 C: Cond, True: NewTrue, False: NewFalse, /*Name=*/"",
2950 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2951 GEPNoWrapFlags Flags =
2952 getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2953 return replaceInstUsesWith(I&: GEP,
2954 V: Builder.CreateGEP(Ty: GEP.getResultElementType(),
2955 Ptr: Src->getPointerOperand(),
2956 IdxList: NewSelect, Name: "", NW: Flags));
2957 }
2958 }
2959
2960 // Find out whether the last index in the source GEP is a sequential idx.
2961 bool EndsWithSequential = false;
2962 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2963 I != E; ++I)
2964 EndsWithSequential = I.isSequential();
2965 if (!EndsWithSequential)
2966 return nullptr;
2967
2968 // Replace: gep (gep %P, long B), long A, ...
2969 // With: T = long A+B; gep %P, T, ...
2970 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands() - 1);
2971 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2972
2973 // If they aren't the same type, then the input hasn't been processed
2974 // by the loop above yet (which canonicalizes sequential index types to
2975 // intptr_t). Just avoid transforming this until the input has been
2976 // normalized.
2977 if (SO1->getType() != GO1->getType())
2978 return nullptr;
2979
2980 Value *Sum =
2981 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2982 // Only do the combine when we are sure the cost after the
2983 // merge is never more than that before the merge.
2984 if (Sum == nullptr)
2985 return nullptr;
2986
2987 SmallVector<Value *, 8> Indices;
2988 Indices.append(in_start: Src->op_begin() + 1, in_end: Src->op_end() - 1);
2989 Indices.push_back(Elt: Sum);
2990 Indices.append(in_start: GEP.op_begin() + 2, in_end: GEP.op_end());
2991
2992 // Don't create GEPs with more than one non-zero index.
2993 unsigned NumNonZeroIndices = count_if(Range&: Indices, P: [](Value *Idx) {
2994 auto *C = dyn_cast<Constant>(Val: Idx);
2995 return !C || !C->isNullValue();
2996 });
2997 if (NumNonZeroIndices > 1)
2998 return nullptr;
2999
3000 return replaceInstUsesWith(
3001 I&: GEP, V: Builder.CreateGEP(
3002 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
3003 NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
3004}
3005
3006Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
3007 BuilderTy *Builder,
3008 bool &DoesConsume, unsigned Depth) {
3009 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
3010 // ~(~(X)) -> X.
3011 Value *A, *B;
3012 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
3013 DoesConsume = true;
3014 return A;
3015 }
3016
3017 Constant *C;
3018 // Constants can be considered to be not'ed values.
3019 if (match(V, P: m_ImmConstant(C)))
3020 return ConstantExpr::getNot(C);
3021
3022 if (Depth++ >= MaxAnalysisRecursionDepth)
3023 return nullptr;
3024
3025 // The rest of the cases require that we invert all uses so don't bother
3026 // doing the analysis if we know we can't use the result.
3027 if (!WillInvertAllUses)
3028 return nullptr;
3029
3030 // Compares can be inverted if all of their uses are being modified to use
3031 // the ~V.
3032 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
3033 if (Builder != nullptr)
3034 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
3035 RHS: I->getOperand(i_nocapture: 1));
3036 return NonNull;
3037 }
3038
3039 // If `V` is of the form `A + B` then `-1 - V` can be folded into
3040 // `(-1 - B) - A` if we are willing to invert all of the uses.
3041 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3042 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3043 DoesConsume, Depth))
3044 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
3045 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3046 DoesConsume, Depth))
3047 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
3048 return nullptr;
3049 }
3050
3051 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
3052 // into `A ^ B` if we are willing to invert all of the uses.
3053 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3054 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3055 DoesConsume, Depth))
3056 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
3057 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3058 DoesConsume, Depth))
3059 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
3060 return nullptr;
3061 }
3062
3063 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3064 // `A + (-1 - B)` if we are willing to invert all of the uses.
3065 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3066 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3067 DoesConsume, Depth))
3068 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
3069 return nullptr;
3070 }
3071
3072 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3073 // into `A s>> B` if we are willing to invert all of the uses.
3074 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3075 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3076 DoesConsume, Depth))
3077 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
3078 return nullptr;
3079 }
3080
3081 Value *Cond;
3082 // LogicOps are special in that we canonicalize them at the cost of an
3083 // instruction.
3084 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
3085 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
3086 // Selects/min/max with invertible operands are freely invertible
3087 if (IsSelect || match(V, P: m_MaxOrMin(Op0: m_Value(V&: A), Op1: m_Value(V&: B)))) {
3088 bool LocalDoesConsume = DoesConsume;
3089 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
3090 DoesConsume&: LocalDoesConsume, Depth))
3091 return nullptr;
3092 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3093 DoesConsume&: LocalDoesConsume, Depth)) {
3094 DoesConsume = LocalDoesConsume;
3095 if (Builder != nullptr) {
3096 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3097 DoesConsume, Depth);
3098 assert(NotB != nullptr &&
3099 "Unable to build inverted value for known freely invertable op");
3100 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
3101 return Builder->CreateBinaryIntrinsic(
3102 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
3103 return Builder->CreateSelect(
3104 C: Cond, True: NotA, False: NotB, Name: "",
3105 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : cast<Instruction>(Val: V));
3106 }
3107 return NonNull;
3108 }
3109 }
3110
3111 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
3112 bool LocalDoesConsume = DoesConsume;
3113 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
3114 for (Use &U : PN->operands()) {
3115 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3116 Value *NewIncomingVal = getFreelyInvertedImpl(
3117 V: U.get(), /*WillInvertAllUses=*/false,
3118 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
3119 if (NewIncomingVal == nullptr)
3120 return nullptr;
3121 // Make sure that we can safely erase the original PHI node.
3122 if (NewIncomingVal == V)
3123 return nullptr;
3124 if (Builder != nullptr)
3125 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
3126 }
3127
3128 DoesConsume = LocalDoesConsume;
3129 if (Builder != nullptr) {
3130 IRBuilderBase::InsertPointGuard Guard(*Builder);
3131 Builder->SetInsertPoint(PN);
3132 PHINode *NewPN =
3133 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
3134 for (auto [Val, Pred] : IncomingValues)
3135 NewPN->addIncoming(V: Val, BB: Pred);
3136 return NewPN;
3137 }
3138 return NonNull;
3139 }
3140
3141 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
3142 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3143 DoesConsume, Depth))
3144 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
3145 return nullptr;
3146 }
3147
3148 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
3149 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3150 DoesConsume, Depth))
3151 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
3152 return nullptr;
3153 }
3154
3155 // De Morgan's Laws:
3156 // (~(A | B)) -> (~A & ~B)
3157 // (~(A & B)) -> (~A | ~B)
3158 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3159 bool IsLogical, Value *A,
3160 Value *B) -> Value * {
3161 bool LocalDoesConsume = DoesConsume;
3162 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
3163 DoesConsume&: LocalDoesConsume, Depth))
3164 return nullptr;
3165 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3166 DoesConsume&: LocalDoesConsume, Depth)) {
3167 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3168 DoesConsume&: LocalDoesConsume, Depth);
3169 DoesConsume = LocalDoesConsume;
3170 if (IsLogical)
3171 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
3172 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
3173 }
3174
3175 return nullptr;
3176 };
3177
3178 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
3179 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3180 B);
3181
3182 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
3183 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3184 B);
3185
3186 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
3187 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3188 B);
3189
3190 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
3191 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3192 B);
3193
3194 return nullptr;
3195}
3196
3197/// Return true if we should canonicalize the gep to an i8 ptradd.
3198static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
3199 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3200 Type *GEPEltType = GEP.getSourceElementType();
3201 if (GEPEltType->isIntegerTy(BitWidth: 8))
3202 return false;
3203
3204 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3205 // intrinsic. This has better support in BasicAA.
3206 if (GEPEltType->isScalableTy())
3207 return true;
3208
3209 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3210 // together.
3211 if (GEP.getNumIndices() == 1 &&
3212 match(V: GEP.getOperand(i_nocapture: 1),
3213 P: m_OneUse(SubPattern: m_CombineOr(Ps: m_Mul(L: m_Value(), R: m_ConstantInt()),
3214 Ps: m_Shl(L: m_Value(), R: m_ConstantInt())))))
3215 return true;
3216
3217 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3218 // possibly be merged together.
3219 auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
3220 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3221 any_of(Range: GEP.indices(), P: [](Value *V) {
3222 const APInt *C;
3223 return match(V, P: m_APInt(Res&: C)) && !C->isZero();
3224 });
3225}
3226
3227static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN,
3228 IRBuilderBase &Builder) {
3229 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
3230 if (!Op1)
3231 return nullptr;
3232
3233 // Don't fold a GEP into itself through a PHI node. This can only happen
3234 // through the back-edge of a loop. Folding a GEP into itself means that
3235 // the value of the previous iteration needs to be stored in the meantime,
3236 // thus requiring an additional register variable to be live, but not
3237 // actually achieving anything (the GEP still needs to be executed once per
3238 // loop iteration).
3239 if (Op1 == &GEP)
3240 return nullptr;
3241 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3242
3243 int DI = -1;
3244
3245 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3246 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
3247 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3248 Op1->getSourceElementType() != Op2->getSourceElementType())
3249 return nullptr;
3250
3251 // As for Op1 above, don't try to fold a GEP into itself.
3252 if (Op2 == &GEP)
3253 return nullptr;
3254
3255 // Keep track of the type as we walk the GEP.
3256 Type *CurTy = nullptr;
3257
3258 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3259 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3260 return nullptr;
3261
3262 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3263 if (DI == -1) {
3264 // We have not seen any differences yet in the GEPs feeding the
3265 // PHI yet, so we record this one if it is allowed to be a
3266 // variable.
3267
3268 // The first two arguments can vary for any GEP, the rest have to be
3269 // static for struct slots
3270 if (J > 1) {
3271 assert(CurTy && "No current type?");
3272 if (CurTy->isStructTy())
3273 return nullptr;
3274 }
3275
3276 DI = J;
3277 } else {
3278 // The GEP is different by more than one input. While this could be
3279 // extended to support GEPs that vary by more than one variable it
3280 // doesn't make sense since it greatly increases the complexity and
3281 // would result in an R+R+R addressing mode which no backend
3282 // directly supports and would need to be broken into several
3283 // simpler instructions anyway.
3284 return nullptr;
3285 }
3286 }
3287
3288 // Sink down a layer of the type for the next iteration.
3289 if (J > 0) {
3290 if (J == 1) {
3291 CurTy = Op1->getSourceElementType();
3292 } else {
3293 CurTy =
3294 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3295 }
3296 }
3297 }
3298
3299 NW &= Op2->getNoWrapFlags();
3300 }
3301
3302 // If not all GEPs are identical we'll have to create a new PHI node.
3303 // Check that the old PHI node has only one use so that it will get
3304 // removed.
3305 if (DI != -1 && !PN->hasOneUse())
3306 return nullptr;
3307
3308 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3309 NewGEP->setNoWrapFlags(NW);
3310
3311 if (DI == -1) {
3312 // All the GEPs feeding the PHI are identical. Clone one down into our
3313 // BB so that it can be merged with the current GEP.
3314 } else {
3315 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3316 // into the current block so it can be merged, and create a new PHI to
3317 // set that index.
3318 PHINode *NewPN;
3319 {
3320 IRBuilderBase::InsertPointGuard Guard(Builder);
3321 Builder.SetInsertPoint(PN);
3322 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3323 NumReservedValues: PN->getNumOperands());
3324 }
3325
3326 for (auto &I : PN->operands())
3327 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3328 BB: PN->getIncomingBlock(U: I));
3329
3330 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3331 }
3332
3333 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3334 return NewGEP;
3335}
3336
3337Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3338 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3339 SmallVector<Value *, 8> Indices(GEP.indices());
3340 Type *GEPType = GEP.getType();
3341 Type *GEPEltType = GEP.getSourceElementType();
3342 if (Value *V =
3343 simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3344 Q: SQ.getWithInstruction(I: &GEP)))
3345 return replaceInstUsesWith(I&: GEP, V);
3346
3347 // For vector geps, use the generic demanded vector support.
3348 // Skip if GEP return type is scalable. The number of elements is unknown at
3349 // compile-time.
3350 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3351 auto VWidth = GEPFVTy->getNumElements();
3352 APInt PoisonElts(VWidth, 0);
3353 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3354 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3355 PoisonElts)) {
3356 if (V != &GEP)
3357 return replaceInstUsesWith(I&: GEP, V);
3358 return &GEP;
3359 }
3360 }
3361
3362 // Eliminate unneeded casts for indices, and replace indices which displace
3363 // by multiples of a zero size type with zero.
3364 bool MadeChange = false;
3365
3366 // Index width may not be the same width as pointer width.
3367 // Data layout chooses the right type based on supported integer types.
3368 Type *NewScalarIndexTy =
3369 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3370
3371 gep_type_iterator GTI = gep_type_begin(GEP);
3372 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3373 ++I, ++GTI) {
3374 // Skip indices into struct types.
3375 if (GTI.isStruct())
3376 continue;
3377
3378 Type *IndexTy = (*I)->getType();
3379 Type *NewIndexType =
3380 IndexTy->isVectorTy()
3381 ? VectorType::get(ElementType: NewScalarIndexTy,
3382 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3383 : NewScalarIndexTy;
3384
3385 // If the element type has zero size then any index over it is equivalent
3386 // to an index of zero, so replace it with zero if it is not zero already.
3387 Type *EltTy = GTI.getIndexedType();
3388 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3389 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
3390 *I = Constant::getNullValue(Ty: NewIndexType);
3391 MadeChange = true;
3392 }
3393
3394 if (IndexTy != NewIndexType) {
3395 // If we are using a wider index than needed for this platform, shrink
3396 // it to what we need. If narrower, sign-extend it to what we need.
3397 // This explicit cast can make subsequent optimizations more obvious.
3398 if (IndexTy->getScalarSizeInBits() <
3399 NewIndexType->getScalarSizeInBits()) {
3400 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3401 *I = Builder.CreateZExt(V: *I, DestTy: NewIndexType, Name: "", /*IsNonNeg=*/true);
3402 else
3403 *I = Builder.CreateSExt(V: *I, DestTy: NewIndexType);
3404 } else {
3405 *I = Builder.CreateTrunc(V: *I, DestTy: NewIndexType, Name: "", IsNUW: GEP.hasNoUnsignedWrap(),
3406 IsNSW: GEP.hasNoUnsignedSignedWrap());
3407 }
3408 MadeChange = true;
3409 }
3410 }
3411 if (MadeChange)
3412 return &GEP;
3413
3414 // Canonicalize constant GEPs to i8 type.
3415 if (!GEPEltType->isIntegerTy(BitWidth: 8) && GEP.hasAllConstantIndices()) {
3416 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
3417 if (GEP.accumulateConstantOffset(DL, Offset))
3418 return replaceInstUsesWith(
3419 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3420 NW: GEP.getNoWrapFlags()));
3421 }
3422
3423 if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3424 Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3425 Value *NewGEP =
3426 Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3427 return replaceInstUsesWith(I&: GEP, V: NewGEP);
3428 }
3429
3430 // Strip trailing zero indices.
3431 auto *LastIdx = dyn_cast<Constant>(Val: Indices.back());
3432 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3433 return replaceInstUsesWith(
3434 I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: PtrOp,
3435 IdxList: drop_end(RangeOrContainer&: Indices), Name: "", NW: GEP.getNoWrapFlags()));
3436 }
3437
3438 // Strip leading zero indices.
3439 auto *FirstIdx = dyn_cast<Constant>(Val: Indices.front());
3440 if (FirstIdx && FirstIdx->isNullValue() &&
3441 !FirstIdx->getType()->isVectorTy()) {
3442 gep_type_iterator GTI = gep_type_begin(GEP);
3443 ++GTI;
3444 if (!GTI.isStruct() && GTI.getSequentialElementStride(DL) ==
3445 DL.getTypeAllocSize(Ty: GTI.getIndexedType()))
3446 return replaceInstUsesWith(I&: GEP, V: Builder.CreateGEP(Ty: GTI.getIndexedType(),
3447 Ptr: GEP.getPointerOperand(),
3448 IdxList: drop_begin(RangeOrContainer&: Indices), Name: "",
3449 NW: GEP.getNoWrapFlags()));
3450 }
3451
3452 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3453 // Note that this looses information about undef lanes; we run it after
3454 // demanded bits to partially mitigate that loss.
3455 if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3456 return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3457 })) {
3458 SmallVector<Value *> NewOps;
3459 for (auto &Op : GEP.operands()) {
3460 if (Op->getType()->isVectorTy())
3461 if (Value *Scalar = getSplatValue(V: Op)) {
3462 NewOps.push_back(Elt: Scalar);
3463 continue;
3464 }
3465 NewOps.push_back(Elt: Op);
3466 }
3467
3468 Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0],
3469 IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3470 NW: GEP.getNoWrapFlags());
3471 if (!Res->getType()->isVectorTy()) {
3472 ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3473 Res = Builder.CreateVectorSplat(EC, V: Res);
3474 }
3475 return replaceInstUsesWith(I&: GEP, V: Res);
3476 }
3477
3478 bool SeenNonZeroIndex = false;
3479 for (auto [IdxNum, Idx] : enumerate(First&: Indices)) {
3480 // Ignore one leading zero index.
3481 auto *C = dyn_cast<Constant>(Val: Idx);
3482 if (C && C->isNullValue() && IdxNum == 0)
3483 continue;
3484
3485 if (!SeenNonZeroIndex) {
3486 SeenNonZeroIndex = true;
3487 continue;
3488 }
3489
3490 // GEP has multiple non-zero indices: Split it.
3491 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(N: IdxNum);
3492 Value *FrontGEP =
3493 Builder.CreateGEP(Ty: GEPEltType, Ptr: PtrOp, IdxList: FrontIndices,
3494 Name: GEP.getName() + ".split", NW: GEP.getNoWrapFlags());
3495
3496 SmallVector<Value *> BackIndices;
3497 BackIndices.push_back(Elt: Constant::getNullValue(Ty: NewScalarIndexTy));
3498 append_range(C&: BackIndices, R: drop_begin(RangeOrContainer&: Indices, N: IdxNum));
3499 return GetElementPtrInst::Create(
3500 PointeeType: GetElementPtrInst::getIndexedType(Ty: GEPEltType, IdxList: FrontIndices), Ptr: FrontGEP,
3501 IdxList: BackIndices, NW: GEP.getNoWrapFlags());
3502 }
3503
3504 // Canonicalize gep %T to gep [sizeof(%T) x i8]:
3505 auto IsCanonicalType = [](Type *Ty) {
3506 if (auto *AT = dyn_cast<ArrayType>(Val: Ty))
3507 Ty = AT->getElementType();
3508 return Ty->isIntegerTy(BitWidth: 8);
3509 };
3510 if (Indices.size() == 1 && !IsCanonicalType(GEPEltType)) {
3511 TypeSize Scale = DL.getTypeAllocSize(Ty: GEPEltType);
3512 assert(!Scale.isScalable() && "Should have been handled earlier");
3513 Type *NewElemTy = Builder.getInt8Ty();
3514 if (Scale.getFixedValue() != 1)
3515 NewElemTy = ArrayType::get(ElementType: NewElemTy, NumElements: Scale.getFixedValue());
3516 GEP.setSourceElementType(NewElemTy);
3517 GEP.setResultElementType(NewElemTy);
3518 // Don't bother revisiting the GEP after this change.
3519 MadeIRChange = true;
3520 }
3521
3522 // Check to see if the inputs to the PHI node are getelementptr instructions.
3523 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3524 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3525 return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp);
3526 }
3527
3528 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3529 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3530 return I;
3531
3532 if (GEP.getNumIndices() == 1) {
3533 unsigned AS = GEP.getPointerAddressSpace();
3534 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
3535 DL.getIndexSizeInBits(AS)) {
3536 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3537
3538 if (TyAllocSize == 1) {
3539 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3540 // but only if the result pointer is only used as if it were an integer.
3541 // (The case where the underlying object is the same is handled by
3542 // InstSimplify.)
3543 Value *X = GEP.getPointerOperand();
3544 Value *Y;
3545 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_Sub(L: m_PtrToIntOrAddr(Op: m_Value(V&: Y)),
3546 R: m_PtrToIntOrAddr(Op: m_Specific(V: X)))) &&
3547 GEPType == Y->getType()) {
3548 bool HasNonAddressBits =
3549 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3550 bool Changed = GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3551 return isa<PtrToAddrInst, ICmpInst>(Val: U.getUser()) ||
3552 (!HasNonAddressBits && isa<PtrToIntInst>(Val: U.getUser()));
3553 });
3554 return Changed ? &GEP : nullptr;
3555 }
3556 } else if (auto *ExactIns =
3557 dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) {
3558 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3559 Value *V;
3560 if (ExactIns->isExact()) {
3561 if ((has_single_bit(Value: TyAllocSize) &&
3562 match(V: GEP.getOperand(i_nocapture: 1),
3563 P: m_Shr(L: m_Value(V),
3564 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) ||
3565 match(V: GEP.getOperand(i_nocapture: 1),
3566 P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3567 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3568 Ptr: GEP.getPointerOperand(), IdxList: V,
3569 NW: GEP.getNoWrapFlags());
3570 }
3571 }
3572 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3573 // Try to canonicalize non-i8 element type to i8 if the index is an
3574 // exact instruction. If the index is an exact instruction (div/shr)
3575 // with a constant RHS, we can fold the non-i8 element scale into the
3576 // div/shr (similiar to the mul case, just inverted).
3577 const APInt *C;
3578 std::optional<APInt> NewC;
3579 if (has_single_bit(Value: TyAllocSize) &&
3580 match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3581 C->uge(RHS: countr_zero(Val: TyAllocSize)))
3582 NewC = *C - countr_zero(Val: TyAllocSize);
3583 else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3584 APInt Quot;
3585 uint64_t Rem;
3586 APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3587 if (Rem == 0)
3588 NewC = Quot;
3589 } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3590 APInt Quot;
3591 int64_t Rem;
3592 APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3593 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3594 if (!Quot.isAllOnes() && Rem == 0)
3595 NewC = Quot;
3596 }
3597
3598 if (NewC.has_value()) {
3599 Value *NewOp = Builder.CreateExactBinOp(
3600 Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3601 RHS: ConstantInt::get(Ty: V->getType(), V: *NewC), /*IsExact=*/true);
3602 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3603 Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3604 NW: GEP.getNoWrapFlags());
3605 }
3606 }
3607 }
3608 }
3609 }
3610 // We do not handle pointer-vector geps here.
3611 if (GEPType->isVectorTy())
3612 return nullptr;
3613
3614 if (!GEP.isInBounds()) {
3615 unsigned IdxWidth =
3616 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3617 APInt BasePtrOffset(IdxWidth, 0);
3618 Value *UnderlyingPtrOp =
3619 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3620 bool CanBeNull;
3621 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3622 DL, CanBeNull, /*CanBeFreed=*/nullptr);
3623 // We can ignore CanBeFreed here, because inbounds is explicitly allowed to
3624 // refer to a deallocated object.
3625 if (!CanBeNull && DerefBytes != 0) {
3626 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3627 BasePtrOffset.isNonNegative()) {
3628 APInt AllocSize(IdxWidth, DerefBytes);
3629 if (BasePtrOffset.ule(RHS: AllocSize)) {
3630 return GetElementPtrInst::CreateInBounds(
3631 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3632 }
3633 }
3634 }
3635 }
3636
3637 // nusw + nneg -> nuw
3638 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3639 all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3640 return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3641 })) {
3642 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3643 return &GEP;
3644 }
3645
3646 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3647 // to do this after having tried to derive "nuw" above.
3648 if (GEP.getNumIndices() == 1) {
3649 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3650 // geps if transforming into (gep (gep p, x), y).
3651 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3652 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3653 // that x + y does not have unsigned wrap.
3654 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3655 return GEP.getNoWrapFlags();
3656 return GEPNoWrapFlags::none();
3657 };
3658
3659 // Try to replace ADD + GEP with GEP + GEP.
3660 Value *Idx1, *Idx2;
3661 if (match(V: GEP.getOperand(i_nocapture: 1),
3662 P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3663 // %idx = add i64 %idx1, %idx2
3664 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3665 // as:
3666 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3667 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3668 bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3669 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3670 auto *NewPtr =
3671 Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3672 IdxList: Idx1, Name: "", NW: NWFlags);
3673 return replaceInstUsesWith(I&: GEP,
3674 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3675 Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3676 }
3677 ConstantInt *C;
3678 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3679 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3680 // %add = add nsw i32 %idx1, idx2
3681 // %sidx = sext i32 %add to i64
3682 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3683 // as:
3684 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3685 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3686 bool NUW = match(V: GEP.getOperand(i_nocapture: 1),
3687 P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3688 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3689 auto *NewPtr = Builder.CreateGEP(
3690 Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3691 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "", NW: NWFlags);
3692 return replaceInstUsesWith(
3693 I&: GEP,
3694 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3695 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()),
3696 Name: "", NW: NWFlags));
3697 }
3698 }
3699
3700 if (Instruction *R = foldSelectGEP(GEP, Builder))
3701 return R;
3702
3703 // srem -> (and/urem) for inbounds+nuw GEP
3704 if (Indices.size() == 1 && GEP.isInBounds() && GEP.hasNoUnsignedWrap()) {
3705 Value *X, *Y;
3706
3707 // Match: idx = srem X, Y -- where Y is a power-of-two value.
3708 if (match(V: Indices[0], P: m_OneUse(SubPattern: m_SRem(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
3709 isKnownToBeAPowerOfTwo(V: Y, /*OrZero=*/true, CxtI: &GEP)) {
3710 // If GEP is inbounds+nuw, the offset cannot be negative
3711 // -> srem by power-of-two can be treated as urem,
3712 // and urem by power-of-two folds to 'and' later.
3713 // OrZero=true is fine here because division by zero is UB.
3714 Instruction *OldIdxI = cast<Instruction>(Val: Indices[0]);
3715 Value *NewIdx = Builder.CreateURem(LHS: X, RHS: Y, Name: OldIdxI->getName());
3716
3717 return GetElementPtrInst::Create(PointeeType: GEPEltType, Ptr: PtrOp, IdxList: {NewIdx},
3718 NW: GEP.getNoWrapFlags());
3719 }
3720 }
3721
3722 return nullptr;
3723}
3724
3725static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
3726 Instruction *AI) {
3727 if (isa<ConstantPointerNull>(Val: V))
3728 return true;
3729 if (auto *LI = dyn_cast<LoadInst>(Val: V))
3730 return isa<GlobalVariable>(Val: LI->getPointerOperand());
3731 // Two distinct allocations will never be equal.
3732 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3733}
3734
3735/// Given a call CB which uses an address UsedV, return true if we can prove the
3736/// call's only possible effect is storing to V.
3737static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3738 const TargetLibraryInfo &TLI) {
3739 if (!CB.use_empty())
3740 // TODO: add recursion if returned attribute is present
3741 return false;
3742
3743 if (CB.isTerminator())
3744 // TODO: remove implementation restriction
3745 return false;
3746
3747 if (!CB.willReturn() || !CB.doesNotThrow())
3748 return false;
3749
3750 // If the only possible side effect of the call is writing to the alloca,
3751 // and the result isn't used, we can safely remove any reads implied by the
3752 // call including those which might read the alloca itself.
3753 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3754 return Dest && Dest->Ptr == UsedV;
3755}
3756
3757static std::optional<ModRefInfo>
3758isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<Instruction *> &Users,
3759 const TargetLibraryInfo &TLI, bool KnowInit) {
3760 SmallVector<Instruction*, 4> Worklist;
3761 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3762 Worklist.push_back(Elt: AI);
3763 ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3764
3765 do {
3766 Instruction *PI = Worklist.pop_back_val();
3767 for (User *U : PI->users()) {
3768 Instruction *I = cast<Instruction>(Val: U);
3769 if (Users.size() >= MaxAllocSiteRemovableUsers)
3770 return std::nullopt;
3771 switch (I->getOpcode()) {
3772 default:
3773 // Give up the moment we see something we can't handle.
3774 return std::nullopt;
3775
3776 case Instruction::AddrSpaceCast:
3777 case Instruction::BitCast:
3778 case Instruction::GetElementPtr:
3779 Users.emplace_back(Args&: I);
3780 Worklist.push_back(Elt: I);
3781 continue;
3782
3783 case Instruction::ICmp: {
3784 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3785 // We can fold eq/ne comparisons with null to false/true, respectively.
3786 // We also fold comparisons in some conditions provided the alloc has
3787 // not escaped (see isNeverEqualToUnescapedAlloc).
3788 if (!ICI->isEquality())
3789 return std::nullopt;
3790 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3791 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3792 return std::nullopt;
3793
3794 // Do not fold compares to aligned_alloc calls, as they may have to
3795 // return null in case the required alignment cannot be satisfied,
3796 // unless we can prove that both alignment and size are valid.
3797 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3798 // Check if alignment and size of a call to aligned_alloc is valid,
3799 // that is alignment is a power-of-2 and the size is a multiple of the
3800 // alignment.
3801 const APInt *Alignment;
3802 const APInt *Size;
3803 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3804 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3805 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3806 };
3807 auto *CB = dyn_cast<CallBase>(Val: AI);
3808 LibFunc TheLibFunc;
3809 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3810 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3811 !AlignmentAndSizeKnownValid(CB))
3812 return std::nullopt;
3813 Users.emplace_back(Args&: I);
3814 continue;
3815 }
3816
3817 case Instruction::Call:
3818 // Ignore no-op and store intrinsics.
3819 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3820 switch (II->getIntrinsicID()) {
3821 default:
3822 return std::nullopt;
3823
3824 case Intrinsic::memmove:
3825 case Intrinsic::memcpy:
3826 case Intrinsic::memset: {
3827 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3828 if (MI->isVolatile())
3829 return std::nullopt;
3830 // Note: this could also be ModRef, but we can still interpret that
3831 // as just Mod in that case.
3832 ModRefInfo NewAccess =
3833 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3834 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3835 return std::nullopt;
3836 Access |= NewAccess;
3837 [[fallthrough]];
3838 }
3839 case Intrinsic::assume:
3840 case Intrinsic::invariant_start:
3841 case Intrinsic::invariant_end:
3842 case Intrinsic::lifetime_start:
3843 case Intrinsic::lifetime_end:
3844 case Intrinsic::objectsize:
3845 Users.emplace_back(Args&: I);
3846 continue;
3847 case Intrinsic::launder_invariant_group:
3848 case Intrinsic::strip_invariant_group:
3849 Users.emplace_back(Args&: I);
3850 Worklist.push_back(Elt: I);
3851 continue;
3852 }
3853 }
3854
3855 if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3856 getAllocationFamily(I, TLI: &TLI) == Family) {
3857 Users.emplace_back(Args&: I);
3858 continue;
3859 }
3860
3861 if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3862 getAllocationFamily(I, TLI: &TLI) == Family) {
3863 Users.emplace_back(Args&: I);
3864 Worklist.push_back(Elt: I);
3865 continue;
3866 }
3867
3868 if (!isRefSet(MRI: Access) &&
3869 isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3870 Access |= ModRefInfo::Mod;
3871 Users.emplace_back(Args&: I);
3872 continue;
3873 }
3874
3875 return std::nullopt;
3876
3877 case Instruction::Store: {
3878 StoreInst *SI = cast<StoreInst>(Val: I);
3879 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3880 return std::nullopt;
3881 if (isRefSet(MRI: Access))
3882 return std::nullopt;
3883 Access |= ModRefInfo::Mod;
3884 Users.emplace_back(Args&: I);
3885 continue;
3886 }
3887
3888 case Instruction::Load: {
3889 LoadInst *LI = cast<LoadInst>(Val: I);
3890 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3891 return std::nullopt;
3892 if (isModSet(MRI: Access))
3893 return std::nullopt;
3894 Access |= ModRefInfo::Ref;
3895 Users.emplace_back(Args&: I);
3896 continue;
3897 }
3898 }
3899 llvm_unreachable("missing a return?");
3900 }
3901 } while (!Worklist.empty());
3902
3903 assert(Access != ModRefInfo::ModRef);
3904 return Access;
3905}
3906
3907Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3908 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3909
3910 // If we have a malloc call which is only used in any amount of comparisons to
3911 // null and free calls, delete the calls and replace the comparisons with true
3912 // or false as appropriate.
3913
3914 // This is based on the principle that we can substitute our own allocation
3915 // function (which will never return null) rather than knowledge of the
3916 // specific function being called. In some sense this can change the permitted
3917 // outputs of a program (when we convert a malloc to an alloca, the fact that
3918 // the allocation is now on the stack is potentially visible, for example),
3919 // but we believe in a permissible manner.
3920 //
3921 // Collect into Instruction* first to avoid expensive WeakTrackingVH
3922 // register/unregister overhead; convert to WeakTrackingVH only when the
3923 // site is actually removable.
3924 SmallVector<Instruction *, 64> RawUsers;
3925
3926 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3927 // before each store.
3928 SmallVector<DbgVariableRecord *, 8> DVRs;
3929 std::unique_ptr<DIBuilder> DIB;
3930 if (isa<AllocaInst>(Val: MI)) {
3931 findDbgUsers(V: &MI, DbgVariableRecords&: DVRs);
3932 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3933 }
3934
3935 // Determine what getInitialValueOfAllocation would return without actually
3936 // allocating the result.
3937 bool KnowInitUndef = false;
3938 bool KnowInitZero = false;
3939 Constant *Init =
3940 getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3941 if (Init) {
3942 if (isa<UndefValue>(Val: Init))
3943 KnowInitUndef = true;
3944 else if (Init->isNullValue())
3945 KnowInitZero = true;
3946 }
3947 // The various sanitizers don't actually return undef memory, but rather
3948 // memory initialized with special forms of runtime poison
3949 auto &F = *MI.getFunction();
3950 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3951 F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3952 KnowInitUndef = false;
3953
3954 auto Removable =
3955 isAllocSiteRemovable(AI: &MI, Users&: RawUsers, TLI, KnowInit: KnowInitZero | KnowInitUndef);
3956 if (Removable) {
3957 SmallVector<WeakTrackingVH, 64> Users(RawUsers.begin(), RawUsers.end());
3958 for (WeakTrackingVH &User : Users) {
3959 // Lowering all @llvm.objectsize and MTI calls first because they may use
3960 // a bitcast/GEP of the alloca we are removing.
3961 if (!User)
3962 continue;
3963
3964 Instruction *I = cast<Instruction>(Val: &*User);
3965
3966 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3967 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3968 SmallVector<Instruction *> InsertedInstructions;
3969 Value *Result = lowerObjectSizeCall(
3970 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3971 for (Instruction *Inserted : InsertedInstructions)
3972 Worklist.add(I: Inserted);
3973 replaceInstUsesWith(I&: *I, V: Result);
3974 eraseInstFromFunction(I&: *I);
3975 User = nullptr; // Skip examining in the next loop.
3976 continue;
3977 }
3978 if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3979 if (KnowInitZero && isRefSet(MRI: *Removable)) {
3980 IRBuilderBase::InsertPointGuard Guard(Builder);
3981 Builder.SetInsertPoint(MTI);
3982 auto *M = Builder.CreateMemSet(
3983 Ptr: MTI->getRawDest(),
3984 Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0),
3985 Size: MTI->getLength(), Align: MTI->getDestAlign());
3986 M->copyMetadata(SrcInst: *MTI);
3987 }
3988 }
3989 }
3990 }
3991 for (WeakTrackingVH &User : Users) {
3992 if (!User)
3993 continue;
3994
3995 Instruction *I = cast<Instruction>(Val: &*User);
3996
3997 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3998 replaceInstUsesWith(
3999 I&: *C, V: ConstantInt::get(Ty: C->getType(), V: C->isFalseWhenEqual()));
4000 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
4001 for (auto *DVR : DVRs)
4002 if (DVR->isAddressOfVariable())
4003 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
4004 } else {
4005 // Casts, GEP, or anything else: we're about to delete this instruction,
4006 // so it can not have any valid uses.
4007 Constant *Replace;
4008 if (isa<LoadInst>(Val: I)) {
4009 assert(KnowInitZero || KnowInitUndef);
4010 Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
4011 : Constant::getNullValue(Ty: I->getType());
4012 } else
4013 Replace = PoisonValue::get(T: I->getType());
4014 replaceInstUsesWith(I&: *I, V: Replace);
4015 }
4016 eraseInstFromFunction(I&: *I);
4017 }
4018
4019 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
4020 // Replace invoke with a NOP intrinsic to maintain the original CFG
4021 Module *M = II->getModule();
4022 Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
4023 auto *NewII = InvokeInst::Create(
4024 Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
4025 NewII->setDebugLoc(II->getDebugLoc());
4026 }
4027
4028 // Remove debug intrinsics which describe the value contained within the
4029 // alloca. In addition to removing dbg.{declare,addr} which simply point to
4030 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
4031 //
4032 // ```
4033 // define void @foo(i32 %0) {
4034 // %a = alloca i32 ; Deleted.
4035 // store i32 %0, i32* %a
4036 // dbg.value(i32 %0, "arg0") ; Not deleted.
4037 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
4038 // call void @trivially_inlinable_no_op(i32* %a)
4039 // ret void
4040 // }
4041 // ```
4042 //
4043 // This may not be required if we stop describing the contents of allocas
4044 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
4045 // the LowerDbgDeclare utility.
4046 //
4047 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
4048 // "arg0" dbg.value may be stale after the call. However, failing to remove
4049 // the DW_OP_deref dbg.value causes large gaps in location coverage.
4050 //
4051 // FIXME: the Assignment Tracking project has now likely made this
4052 // redundant (and it's sometimes harmful).
4053 for (auto *DVR : DVRs)
4054 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
4055 DVR->eraseFromParent();
4056
4057 return eraseInstFromFunction(I&: MI);
4058 }
4059 return nullptr;
4060}
4061
4062/// Move the call to free before a NULL test.
4063///
4064/// Check if this free is accessed after its argument has been test
4065/// against NULL (property 0).
4066/// If yes, it is legal to move this call in its predecessor block.
4067///
4068/// The move is performed only if the block containing the call to free
4069/// will be removed, i.e.:
4070/// 1. it has only one predecessor P, and P has two successors
4071/// 2. it contains the call, noops, and an unconditional branch
4072/// 3. its successor is the same as its predecessor's successor
4073///
4074/// The profitability is out-of concern here and this function should
4075/// be called only if the caller knows this transformation would be
4076/// profitable (e.g., for code size).
4077static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
4078 const DataLayout &DL) {
4079 Value *Op = FI.getArgOperand(i: 0);
4080 BasicBlock *FreeInstrBB = FI.getParent();
4081 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
4082
4083 // Validate part of constraint #1: Only one predecessor
4084 // FIXME: We can extend the number of predecessor, but in that case, we
4085 // would duplicate the call to free in each predecessor and it may
4086 // not be profitable even for code size.
4087 if (!PredBB)
4088 return nullptr;
4089
4090 // Validate constraint #2: Does this block contains only the call to
4091 // free, noops, and an unconditional branch?
4092 BasicBlock *SuccBB;
4093 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
4094 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
4095 return nullptr;
4096
4097 // If there are only 2 instructions in the block, at this point,
4098 // this is the call to free and unconditional.
4099 // If there are more than 2 instructions, check that they are noops
4100 // i.e., they won't hurt the performance of the generated code.
4101 if (FreeInstrBB->size() != 2) {
4102 for (const Instruction &Inst : *FreeInstrBB) {
4103 if (&Inst == &FI || &Inst == FreeInstrBBTerminator ||
4104 isa<PseudoProbeInst>(Val: Inst))
4105 continue;
4106 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
4107 if (!Cast || !Cast->isNoopCast(DL))
4108 return nullptr;
4109 }
4110 }
4111 // Validate the rest of constraint #1 by matching on the pred branch.
4112 Instruction *TI = PredBB->getTerminator();
4113 BasicBlock *TrueBB, *FalseBB;
4114 CmpPredicate Pred;
4115 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
4116 L: m_CombineOr(Ps: m_Specific(V: Op),
4117 Ps: m_Specific(V: Op->stripPointerCasts())),
4118 R: m_Zero()),
4119 T&: TrueBB, F&: FalseBB)))
4120 return nullptr;
4121 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4122 return nullptr;
4123
4124 // Validate constraint #3: Ensure the null case just falls through.
4125 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4126 return nullptr;
4127 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4128 "Broken CFG: missing edge from predecessor to successor");
4129
4130 // At this point, we know that everything in FreeInstrBB can be moved
4131 // before TI.
4132 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
4133 if (&Instr == FreeInstrBBTerminator)
4134 break;
4135 Instr.moveBeforePreserving(MovePos: TI->getIterator());
4136 }
4137 assert(FreeInstrBB->size() == 1 &&
4138 "Only the branch instruction should remain");
4139
4140 // Now that we've moved the call to free before the NULL check, we have to
4141 // remove any attributes on its parameter that imply it's non-null, because
4142 // those attributes might have only been valid because of the NULL check, and
4143 // we can get miscompiles if we keep them. This is conservative if non-null is
4144 // also implied by something other than the NULL check, but it's guaranteed to
4145 // be correct, and the conservativeness won't matter in practice, since the
4146 // attributes are irrelevant for the call to free itself and the pointer
4147 // shouldn't be used after the call.
4148 AttributeList Attrs = FI.getAttributes();
4149 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull);
4150 Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable);
4151 if (Dereferenceable.isValid()) {
4152 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4153 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0,
4154 Kind: Attribute::Dereferenceable);
4155 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
4156 }
4157 FI.setAttributes(Attrs);
4158
4159 return &FI;
4160}
4161
4162Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
4163 // free undef -> unreachable.
4164 if (isa<UndefValue>(Val: Op)) {
4165 // Leave a marker since we can't modify the CFG here.
4166 CreateNonTerminatorUnreachable(InsertAt: &FI);
4167 return eraseInstFromFunction(I&: FI);
4168 }
4169
4170 // If we have 'free null' delete the instruction. This can happen in stl code
4171 // when lots of inlining happens.
4172 if (isa<ConstantPointerNull>(Val: Op))
4173 return eraseInstFromFunction(I&: FI);
4174
4175 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4176 // realloc() entirely.
4177 CallInst *CI = dyn_cast<CallInst>(Val: Op);
4178 if (CI && CI->hasOneUse())
4179 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
4180 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
4181
4182 // If we optimize for code size, try to move the call to free before the null
4183 // test so that simplify cfg can remove the empty block and dead code
4184 // elimination the branch. I.e., helps to turn something like:
4185 // if (foo) free(foo);
4186 // into
4187 // free(foo);
4188 //
4189 // Note that we can only do this for 'free' and not for any flavor of
4190 // 'operator delete'; there is no 'operator delete' symbol for which we are
4191 // permitted to invent a call, even if we're passing in a null pointer.
4192 if (MinimizeSize) {
4193 LibFunc Func;
4194 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
4195 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
4196 return I;
4197 }
4198
4199 return nullptr;
4200}
4201
4202Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
4203 Value *RetVal = RI.getReturnValue();
4204 if (!RetVal)
4205 return nullptr;
4206
4207 Function *F = RI.getFunction();
4208 Type *RetTy = RetVal->getType();
4209 if (RetTy->isPointerTy()) {
4210 bool HasDereferenceable =
4211 F->getAttributes().getRetDereferenceableBytes() > 0;
4212 if (F->hasRetAttribute(Kind: Attribute::NonNull) ||
4213 (HasDereferenceable &&
4214 !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
4215 if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
4216 return replaceOperand(I&: RI, OpNum: 0, V);
4217 }
4218 }
4219
4220 if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
4221 return nullptr;
4222
4223 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4224 if (ReturnClass == fcNone)
4225 return nullptr;
4226
4227 KnownFPClass KnownClass;
4228 if (SimplifyDemandedFPClass(I: &RI, Op: 0, DemandedMask: ~ReturnClass, Known&: KnownClass,
4229 Q: SQ.getWithInstruction(I: &RI)))
4230 return &RI;
4231
4232 return nullptr;
4233}
4234
4235// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4236bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
4237 // Try to remove the previous instruction if it must lead to unreachable.
4238 // This includes instructions like stores and "llvm.assume" that may not get
4239 // removed by simple dead code elimination.
4240 bool Changed = false;
4241 while (Instruction *Prev = I.getPrevNode()) {
4242 // While we theoretically can erase EH, that would result in a block that
4243 // used to start with an EH no longer starting with EH, which is invalid.
4244 // To make it valid, we'd need to fixup predecessors to no longer refer to
4245 // this block, but that changes CFG, which is not allowed in InstCombine.
4246 if (Prev->isEHPad())
4247 break; // Can not drop any more instructions. We're done here.
4248
4249 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
4250 break; // Can not drop any more instructions. We're done here.
4251 // Otherwise, this instruction can be freely erased,
4252 // even if it is not side-effect free.
4253
4254 // A value may still have uses before we process it here (for example, in
4255 // another unreachable block), so convert those to poison.
4256 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
4257 eraseInstFromFunction(I&: *Prev);
4258 Changed = true;
4259 }
4260 return Changed;
4261}
4262
4263Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
4264 removeInstructionsBeforeUnreachable(I);
4265 return nullptr;
4266}
4267
4268Instruction *InstCombinerImpl::visitUncondBrInst(UncondBrInst &BI) {
4269 // If this store is the second-to-last instruction in the basic block
4270 // (excluding debug info) and if the block ends with
4271 // an unconditional branch, try to move the store to the successor block.
4272
4273 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4274 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4275 do {
4276 if (BBI != FirstInstr)
4277 --BBI;
4278 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4279
4280 return dyn_cast<StoreInst>(Val&: BBI);
4281 };
4282
4283 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4284 if (mergeStoreIntoSuccessor(SI&: *SI))
4285 return &BI;
4286
4287 return nullptr;
4288}
4289
4290void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
4291 SmallVectorImpl<BasicBlock *> &Worklist) {
4292 if (!DeadEdges.insert(V: {From, To}).second)
4293 return;
4294
4295 // Replace phi node operands in successor with poison.
4296 for (PHINode &PN : To->phis())
4297 for (Use &U : PN.incoming_values())
4298 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
4299 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
4300 addToWorklist(I: &PN);
4301 MadeIRChange = true;
4302 }
4303
4304 Worklist.push_back(Elt: To);
4305}
4306
4307// Under the assumption that I is unreachable, remove it and following
4308// instructions. Changes are reported directly to MadeIRChange.
4309void InstCombinerImpl::handleUnreachableFrom(
4310 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
4311 BasicBlock *BB = I->getParent();
4312 for (Instruction &Inst : make_early_inc_range(
4313 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
4314 y: std::next(x: I->getReverseIterator())))) {
4315 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4316 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
4317 MadeIRChange = true;
4318 }
4319 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4320 continue;
4321 // RemoveDIs: erase debug-info on this instruction manually.
4322 Inst.dropDbgRecords();
4323 eraseInstFromFunction(I&: Inst);
4324 MadeIRChange = true;
4325 }
4326
4327 SmallVector<Value *> Changed;
4328 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
4329 MadeIRChange = true;
4330 for (Value *V : Changed)
4331 addToWorklist(I: cast<Instruction>(Val: V));
4332 }
4333
4334 // Handle potentially dead successors.
4335 for (BasicBlock *Succ : successors(BB))
4336 addDeadEdge(From: BB, To: Succ, Worklist);
4337}
4338
4339void InstCombinerImpl::handlePotentiallyDeadBlocks(
4340 SmallVectorImpl<BasicBlock *> &Worklist) {
4341 while (!Worklist.empty()) {
4342 BasicBlock *BB = Worklist.pop_back_val();
4343 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
4344 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
4345 }))
4346 continue;
4347
4348 handleUnreachableFrom(I: &BB->front(), Worklist);
4349 }
4350}
4351
4352void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4353 BasicBlock *LiveSucc) {
4354 SmallVector<BasicBlock *> Worklist;
4355 for (BasicBlock *Succ : successors(BB)) {
4356 // The live successor isn't dead.
4357 if (Succ == LiveSucc)
4358 continue;
4359
4360 addDeadEdge(From: BB, To: Succ, Worklist);
4361 }
4362
4363 handlePotentiallyDeadBlocks(Worklist);
4364}
4365
4366Instruction *InstCombinerImpl::visitCondBrInst(CondBrInst &BI) {
4367 // Change br (not X), label True, label False to: br X, label False, True
4368 Value *Cond = BI.getCondition();
4369 Value *X;
4370 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4371 // Swap Destinations and condition...
4372 BI.swapSuccessors();
4373 if (BPI)
4374 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4375 return replaceOperand(I&: BI, OpNum: 0, V: X);
4376 }
4377
4378 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4379 // This is done by inverting the condition and swapping successors:
4380 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4381 Value *Y;
4382 if (isa<SelectInst>(Val: Cond) &&
4383 match(V: Cond,
4384 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4385 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4386 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4387
4388 // Set weights for the new OR select instruction too.
4389 if (!ProfcheckDisableMetadataFixes) {
4390 if (auto *OrInst = dyn_cast<Instruction>(Val: Or)) {
4391 if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
4392 SmallVector<uint32_t> Weights;
4393 if (extractBranchWeights(I: *CondInst, Weights)) {
4394 assert(Weights.size() == 2 &&
4395 "Unexpected number of branch weights!");
4396 std::swap(a&: Weights[0], b&: Weights[1]);
4397 setBranchWeights(I&: *OrInst, Weights, /*IsExpected=*/false);
4398 }
4399 }
4400 }
4401 }
4402 BI.swapSuccessors();
4403 if (BPI)
4404 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4405 return replaceOperand(I&: BI, OpNum: 0, V: Or);
4406 }
4407
4408 // If the condition is irrelevant, remove the use so that other
4409 // transforms on the condition become more effective.
4410 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
4411 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
4412
4413 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4414 CmpPredicate Pred;
4415 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4416 !isCanonicalPredicate(Pred)) {
4417 // Swap destinations and condition.
4418 auto *Cmp = cast<CmpInst>(Val: Cond);
4419 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4420 BI.swapSuccessors();
4421 if (BPI)
4422 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4423 Worklist.push(I: Cmp);
4424 return &BI;
4425 }
4426
4427 if (isa<UndefValue>(Val: Cond)) {
4428 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
4429 return nullptr;
4430 }
4431 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4432 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4433 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4434 return nullptr;
4435 }
4436
4437 // Replace all dominated uses of the condition with true/false
4438 // Ignore constant expressions to avoid iterating over uses on other
4439 // functions.
4440 if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) {
4441 for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4442 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0));
4443 if (DT.dominates(BBE: Edge0, U)) {
4444 replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4445 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4446 continue;
4447 }
4448 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1));
4449 if (DT.dominates(BBE: Edge1, U)) {
4450 replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4451 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4452 }
4453 }
4454 }
4455
4456 DC.registerBranch(BI: &BI);
4457 return nullptr;
4458}
4459
4460// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4461// we can prove that both (switch C) and (switch X) go to the default when cond
4462// is false/true.
4463static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4464 SelectInst *Select,
4465 bool IsTrueArm) {
4466 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4467 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4468 if (!C)
4469 return nullptr;
4470
4471 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4472 if (CstBB != SI.getDefaultDest())
4473 return nullptr;
4474 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
4475 CmpPredicate Pred;
4476 const APInt *RHSC;
4477 if (!match(V: Select->getCondition(),
4478 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4479 return nullptr;
4480 if (IsTrueArm)
4481 Pred = ICmpInst::getInversePredicate(pred: Pred);
4482
4483 // See whether we can replace the select with X
4484 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4485 for (auto Case : SI.cases())
4486 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4487 return nullptr;
4488
4489 return X;
4490}
4491
4492Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4493 Value *Cond = SI.getCondition();
4494 Value *Op0;
4495 const APInt *CondOpC;
4496 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4497
4498 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4499 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))))
4500 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4501 return [](const APInt &Case, const APInt &C) { return Case - C; };
4502
4503 if (match(V: Cond, P: m_Sub(L: m_APInt(Res&: CondOpC), R: m_Value(V&: Op0))))
4504 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4505 return [](const APInt &Case, const APInt &C) { return C - Case; };
4506
4507 if (match(V: Cond, P: m_Xor(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))) &&
4508 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4509 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4510 // Prevent creation of large case values by excluding extremes.
4511 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4512
4513 return nullptr;
4514 };
4515
4516 // Attempt to invert and simplify the switch condition, as long as the
4517 // condition is not used further, as it may not be profitable otherwise.
4518 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4519 for (auto &Case : SI.cases()) {
4520 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4521 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: New));
4522 }
4523 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4524 }
4525
4526 uint64_t ShiftAmt;
4527 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4528 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4529 all_of(Range: SI.cases(), P: [&](const auto &Case) {
4530 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4531 })) {
4532 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4533 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4534 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4535 Shl->hasOneUse()) {
4536 Value *NewCond = Op0;
4537 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4538 // If the shift may wrap, we need to mask off the shifted bits.
4539 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4540 NewCond = Builder.CreateAnd(
4541 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4542 }
4543 for (auto Case : SI.cases()) {
4544 const APInt &CaseVal = Case.getCaseValue()->getValue();
4545 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4546 : CaseVal.lshr(shiftAmt: ShiftAmt);
4547 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4548 }
4549 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4550 }
4551 }
4552
4553 // Fold switch(zext/sext(X)) into switch(X) if possible.
4554 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4555 bool IsZExt = isa<ZExtInst>(Val: Cond);
4556 Type *SrcTy = Op0->getType();
4557 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4558
4559 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4560 const APInt &CaseVal = Case.getCaseValue()->getValue();
4561 return IsZExt ? CaseVal.isIntN(N: NewWidth)
4562 : CaseVal.isSignedIntN(N: NewWidth);
4563 })) {
4564 for (auto &Case : SI.cases()) {
4565 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4566 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4567 }
4568 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4569 }
4570 }
4571
4572 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4573 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4574 if (Value *V =
4575 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4576 return replaceOperand(I&: SI, OpNum: 0, V);
4577 if (Value *V =
4578 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4579 return replaceOperand(I&: SI, OpNum: 0, V);
4580 }
4581
4582 KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4583 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4584 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4585
4586 // Compute the number of leading bits we can ignore.
4587 // TODO: A better way to determine this would use ComputeNumSignBits().
4588 for (const auto &C : SI.cases()) {
4589 LeadingKnownZeros =
4590 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4591 LeadingKnownOnes =
4592 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4593 }
4594
4595 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4596
4597 // Shrink the condition operand if the new type is smaller than the old type.
4598 // But do not shrink to a non-standard type, because backend can't generate
4599 // good code for that yet.
4600 // TODO: We can make it aggressive again after fixing PR39569.
4601 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4602 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4603 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4604 Builder.SetInsertPoint(&SI);
4605 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4606
4607 for (auto Case : SI.cases()) {
4608 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4609 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4610 }
4611 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4612 }
4613
4614 if (isa<UndefValue>(Val: Cond)) {
4615 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
4616 return nullptr;
4617 }
4618 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4619 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4620 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4621 return nullptr;
4622 }
4623
4624 return nullptr;
4625}
4626
4627Instruction *
4628InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4629 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4630 if (!WO)
4631 return nullptr;
4632
4633 Intrinsic::ID OvID = WO->getIntrinsicID();
4634 const APInt *C = nullptr;
4635 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4636 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4637 OvID == Intrinsic::umul_with_overflow)) {
4638 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4639 if (C->isAllOnes())
4640 return BinaryOperator::CreateNeg(Op: WO->getLHS());
4641 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4642 if (C->isPowerOf2()) {
4643 return BinaryOperator::CreateShl(
4644 V1: WO->getLHS(),
4645 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4646 }
4647 }
4648 }
4649
4650 // We're extracting from an overflow intrinsic. See if we're the only user.
4651 // That allows us to simplify multiple result intrinsics to simpler things
4652 // that just get one value.
4653 if (!WO->hasOneUse())
4654 return nullptr;
4655
4656 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4657 // and replace it with a traditional binary instruction.
4658 if (*EV.idx_begin() == 0) {
4659 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4660 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4661 // Replace the old instruction's uses with poison.
4662 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4663 eraseInstFromFunction(I&: *WO);
4664 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4665 }
4666
4667 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4668
4669 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4670 if (OvID == Intrinsic::usub_with_overflow)
4671 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4672
4673 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4674 // +1 is not possible because we assume signed values.
4675 if (OvID == Intrinsic::smul_with_overflow &&
4676 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
4677 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4678
4679 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4680 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4681 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4682 // Only handle even bitwidths for performance reasons.
4683 if (BitWidth % 2 == 0)
4684 return new ICmpInst(
4685 ICmpInst::ICMP_UGT, WO->getLHS(),
4686 ConstantInt::get(Ty: WO->getLHS()->getType(),
4687 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
4688 }
4689
4690 // If only the overflow result is used, and the right hand side is a
4691 // constant (or constant splat), we can remove the intrinsic by directly
4692 // checking for overflow.
4693 if (C) {
4694 // Compute the no-wrap range for LHS given RHS=C, then construct an
4695 // equivalent icmp, potentially using an offset.
4696 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4697 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4698
4699 CmpInst::Predicate Pred;
4700 APInt NewRHSC, Offset;
4701 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4702 auto *OpTy = WO->getRHS()->getType();
4703 auto *NewLHS = WO->getLHS();
4704 if (Offset != 0)
4705 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4706 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4707 ConstantInt::get(Ty: OpTy, V: NewRHSC));
4708 }
4709
4710 return nullptr;
4711}
4712
4713static Value *foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall,
4714 SelectInst *SelectInst,
4715 InstCombiner::BuilderTy &Builder) {
4716 // Helper to fold frexp of select to select of frexp.
4717
4718 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4719 return nullptr;
4720 Value *Cond = SelectInst->getCondition();
4721 Value *TrueVal = SelectInst->getTrueValue();
4722 Value *FalseVal = SelectInst->getFalseValue();
4723
4724 const APFloat *ConstVal = nullptr;
4725 Value *VarOp = nullptr;
4726 bool ConstIsTrue = false;
4727
4728 if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4729 VarOp = FalseVal;
4730 ConstIsTrue = true;
4731 } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4732 VarOp = TrueVal;
4733 ConstIsTrue = false;
4734 } else {
4735 return nullptr;
4736 }
4737
4738 Builder.SetInsertPoint(&EV);
4739
4740 CallInst *NewFrexp =
4741 Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4742 NewFrexp->copyIRFlags(V: FrexpCall);
4743
4744 Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa");
4745
4746 int Exp;
4747 APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4748
4749 Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4750
4751 Value *NewSel = Builder.CreateSelectFMF(
4752 C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4753 False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4754 return NewSel;
4755}
4756Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4757 Value *Agg = EV.getAggregateOperand();
4758
4759 if (!EV.hasIndices())
4760 return replaceInstUsesWith(I&: EV, V: Agg);
4761
4762 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4763 Q: SQ.getWithInstruction(I: &EV)))
4764 return replaceInstUsesWith(I&: EV, V);
4765
4766 Value *Cond, *TrueVal, *FalseVal;
4767 if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4768 C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4769 auto *SelInst =
4770 cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0));
4771 if (Value *Result =
4772 foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4773 return replaceInstUsesWith(I&: EV, V: Result);
4774 }
4775 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4776 // We're extracting from an insertvalue instruction, compare the indices
4777 const unsigned *exti, *exte, *insi, *inse;
4778 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4779 exte = EV.idx_end(), inse = IV->idx_end();
4780 exti != exte && insi != inse;
4781 ++exti, ++insi) {
4782 if (*insi != *exti)
4783 // The insert and extract both reference distinctly different elements.
4784 // This means the extract is not influenced by the insert, and we can
4785 // replace the aggregate operand of the extract with the aggregate
4786 // operand of the insert. i.e., replace
4787 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4788 // %E = extractvalue { i32, { i32 } } %I, 0
4789 // with
4790 // %E = extractvalue { i32, { i32 } } %A, 0
4791 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4792 Idxs: EV.getIndices());
4793 }
4794 if (exti == exte && insi == inse)
4795 // Both iterators are at the end: Index lists are identical. Replace
4796 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4797 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4798 // with "i32 42"
4799 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4800 if (exti == exte) {
4801 // The extract list is a prefix of the insert list. i.e. replace
4802 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4803 // %E = extractvalue { i32, { i32 } } %I, 1
4804 // with
4805 // %X = extractvalue { i32, { i32 } } %A, 1
4806 // %E = insertvalue { i32 } %X, i32 42, 0
4807 // by switching the order of the insert and extract (though the
4808 // insertvalue should be left in, since it may have other uses).
4809 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4810 Idxs: EV.getIndices());
4811 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4812 Idxs: ArrayRef(insi, inse));
4813 }
4814 if (insi == inse)
4815 // The insert list is a prefix of the extract list
4816 // We can simply remove the common indices from the extract and make it
4817 // operate on the inserted value instead of the insertvalue result.
4818 // i.e., replace
4819 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4820 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4821 // with
4822 // %E extractvalue { i32 } { i32 42 }, 0
4823 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4824 Idxs: ArrayRef(exti, exte));
4825 }
4826
4827 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4828 return R;
4829
4830 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4831 // Bail out if the aggregate contains scalable vector type
4832 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4833 STy && STy->isScalableTy())
4834 return nullptr;
4835
4836 // If the (non-volatile) load only has one use, we can rewrite this to a
4837 // load from a GEP. This reduces the size of the load. If a load is used
4838 // only by extractvalue instructions then this either must have been
4839 // optimized before, or it is a struct with padding, in which case we
4840 // don't want to do the transformation as it loses padding knowledge.
4841 if (L->isSimple() && L->hasOneUse()) {
4842 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4843 SmallVector<Value*, 4> Indices;
4844 // Prefix an i32 0 since we need the first element.
4845 Indices.push_back(Elt: Builder.getInt32(C: 0));
4846 for (unsigned Idx : EV.indices())
4847 Indices.push_back(Elt: Builder.getInt32(C: Idx));
4848
4849 // We need to insert these at the location of the old load, not at that of
4850 // the extractvalue.
4851 Builder.SetInsertPoint(L);
4852 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4853 Ptr: L->getPointerOperand(), IdxList: Indices);
4854 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4855 // Whatever aliasing information we had for the orignal load must also
4856 // hold for the smaller load, so propagate the annotations.
4857 NL->setAAMetadata(L->getAAMetadata());
4858 // Returning the load directly will cause the main loop to insert it in
4859 // the wrong spot, so use replaceInstUsesWith().
4860 return replaceInstUsesWith(I&: EV, V: NL);
4861 }
4862 }
4863
4864 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4865 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4866 return Res;
4867
4868 // Canonicalize extract (select Cond, TV, FV)
4869 // -> select cond, (extract TV), (extract FV)
4870 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4871 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
4872 return R;
4873
4874 // We could simplify extracts from other values. Note that nested extracts may
4875 // already be simplified implicitly by the above: extract (extract (insert) )
4876 // will be translated into extract ( insert ( extract ) ) first and then just
4877 // the value inserted, if appropriate. Similarly for extracts from single-use
4878 // loads: extract (extract (load)) will be translated to extract (load (gep))
4879 // and if again single-use then via load (gep (gep)) to load (gep).
4880 // However, double extracts from e.g. function arguments or return values
4881 // aren't handled yet.
4882 return nullptr;
4883}
4884
4885/// Return 'true' if the given typeinfo will match anything.
4886static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4887 switch (Personality) {
4888 case EHPersonality::GNU_C:
4889 case EHPersonality::GNU_C_SjLj:
4890 case EHPersonality::Rust:
4891 // The GCC C EH and Rust personality only exists to support cleanups, so
4892 // it's not clear what the semantics of catch clauses are.
4893 return false;
4894 case EHPersonality::Unknown:
4895 return false;
4896 case EHPersonality::GNU_Ada:
4897 // While __gnat_all_others_value will match any Ada exception, it doesn't
4898 // match foreign exceptions (or didn't, before gcc-4.7).
4899 return false;
4900 case EHPersonality::GNU_CXX:
4901 case EHPersonality::GNU_CXX_SjLj:
4902 case EHPersonality::GNU_ObjC:
4903 case EHPersonality::MSVC_X86SEH:
4904 case EHPersonality::MSVC_TableSEH:
4905 case EHPersonality::MSVC_CXX:
4906 case EHPersonality::CoreCLR:
4907 case EHPersonality::Wasm_CXX:
4908 case EHPersonality::XL_CXX:
4909 case EHPersonality::ZOS_CXX:
4910 return isa<ConstantPointerNull>(Val: TypeInfo);
4911 }
4912 llvm_unreachable("invalid enum");
4913}
4914
4915static bool shorter_filter(const Value *LHS, const Value *RHS) {
4916 return
4917 cast<ArrayType>(Val: LHS->getType())->getNumElements()
4918 <
4919 cast<ArrayType>(Val: RHS->getType())->getNumElements();
4920}
4921
4922Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4923 // The logic here should be correct for any real-world personality function.
4924 // However if that turns out not to be true, the offending logic can always
4925 // be conditioned on the personality function, like the catch-all logic is.
4926 EHPersonality Personality =
4927 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4928
4929 // Simplify the list of clauses, eg by removing repeated catch clauses
4930 // (these are often created by inlining).
4931 bool MakeNewInstruction = false; // If true, recreate using the following:
4932 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4933 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4934
4935 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4936 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4937 bool isLastClause = i + 1 == e;
4938 if (LI.isCatch(Idx: i)) {
4939 // A catch clause.
4940 Constant *CatchClause = LI.getClause(Idx: i);
4941 Constant *TypeInfo = CatchClause->stripPointerCasts();
4942
4943 // If we already saw this clause, there is no point in having a second
4944 // copy of it.
4945 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4946 // This catch clause was not already seen.
4947 NewClauses.push_back(Elt: CatchClause);
4948 } else {
4949 // Repeated catch clause - drop the redundant copy.
4950 MakeNewInstruction = true;
4951 }
4952
4953 // If this is a catch-all then there is no point in keeping any following
4954 // clauses or marking the landingpad as having a cleanup.
4955 if (isCatchAll(Personality, TypeInfo)) {
4956 if (!isLastClause)
4957 MakeNewInstruction = true;
4958 CleanupFlag = false;
4959 break;
4960 }
4961 } else {
4962 // A filter clause. If any of the filter elements were already caught
4963 // then they can be dropped from the filter. It is tempting to try to
4964 // exploit the filter further by saying that any typeinfo that does not
4965 // occur in the filter can't be caught later (and thus can be dropped).
4966 // However this would be wrong, since typeinfos can match without being
4967 // equal (for example if one represents a C++ class, and the other some
4968 // class derived from it).
4969 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4970 Constant *FilterClause = LI.getClause(Idx: i);
4971 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4972 unsigned NumTypeInfos = FilterType->getNumElements();
4973
4974 // An empty filter catches everything, so there is no point in keeping any
4975 // following clauses or marking the landingpad as having a cleanup. By
4976 // dealing with this case here the following code is made a bit simpler.
4977 if (!NumTypeInfos) {
4978 NewClauses.push_back(Elt: FilterClause);
4979 if (!isLastClause)
4980 MakeNewInstruction = true;
4981 CleanupFlag = false;
4982 break;
4983 }
4984
4985 bool MakeNewFilter = false; // If true, make a new filter.
4986 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4987 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4988 // Not an empty filter - it contains at least one null typeinfo.
4989 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4990 Constant *TypeInfo =
4991 Constant::getNullValue(Ty: FilterType->getElementType());
4992 // If this typeinfo is a catch-all then the filter can never match.
4993 if (isCatchAll(Personality, TypeInfo)) {
4994 // Throw the filter away.
4995 MakeNewInstruction = true;
4996 continue;
4997 }
4998
4999 // There is no point in having multiple copies of this typeinfo, so
5000 // discard all but the first copy if there is more than one.
5001 NewFilterElts.push_back(Elt: TypeInfo);
5002 if (NumTypeInfos > 1)
5003 MakeNewFilter = true;
5004 } else {
5005 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
5006 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
5007 NewFilterElts.reserve(N: NumTypeInfos);
5008
5009 // Remove any filter elements that were already caught or that already
5010 // occurred in the filter. While there, see if any of the elements are
5011 // catch-alls. If so, the filter can be discarded.
5012 bool SawCatchAll = false;
5013 for (unsigned j = 0; j != NumTypeInfos; ++j) {
5014 Constant *Elt = Filter->getOperand(i_nocapture: j);
5015 Constant *TypeInfo = Elt->stripPointerCasts();
5016 if (isCatchAll(Personality, TypeInfo)) {
5017 // This element is a catch-all. Bail out, noting this fact.
5018 SawCatchAll = true;
5019 break;
5020 }
5021
5022 // Even if we've seen a type in a catch clause, we don't want to
5023 // remove it from the filter. An unexpected type handler may be
5024 // set up for a call site which throws an exception of the same
5025 // type caught. In order for the exception thrown by the unexpected
5026 // handler to propagate correctly, the filter must be correctly
5027 // described for the call site.
5028 //
5029 // Example:
5030 //
5031 // void unexpected() { throw 1;}
5032 // void foo() throw (int) {
5033 // std::set_unexpected(unexpected);
5034 // try {
5035 // throw 2.0;
5036 // } catch (int i) {}
5037 // }
5038
5039 // There is no point in having multiple copies of the same typeinfo in
5040 // a filter, so only add it if we didn't already.
5041 if (SeenInFilter.insert(Ptr: TypeInfo).second)
5042 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
5043 }
5044 // A filter containing a catch-all cannot match anything by definition.
5045 if (SawCatchAll) {
5046 // Throw the filter away.
5047 MakeNewInstruction = true;
5048 continue;
5049 }
5050
5051 // If we dropped something from the filter, make a new one.
5052 if (NewFilterElts.size() < NumTypeInfos)
5053 MakeNewFilter = true;
5054 }
5055 if (MakeNewFilter) {
5056 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
5057 NumElements: NewFilterElts.size());
5058 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
5059 MakeNewInstruction = true;
5060 }
5061
5062 NewClauses.push_back(Elt: FilterClause);
5063
5064 // If the new filter is empty then it will catch everything so there is
5065 // no point in keeping any following clauses or marking the landingpad
5066 // as having a cleanup. The case of the original filter being empty was
5067 // already handled above.
5068 if (MakeNewFilter && !NewFilterElts.size()) {
5069 assert(MakeNewInstruction && "New filter but not a new instruction!");
5070 CleanupFlag = false;
5071 break;
5072 }
5073 }
5074 }
5075
5076 // If several filters occur in a row then reorder them so that the shortest
5077 // filters come first (those with the smallest number of elements). This is
5078 // advantageous because shorter filters are more likely to match, speeding up
5079 // unwinding, but mostly because it increases the effectiveness of the other
5080 // filter optimizations below.
5081 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
5082 unsigned j;
5083 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
5084 for (j = i; j != e; ++j)
5085 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
5086 break;
5087
5088 // Check whether the filters are already sorted by length. We need to know
5089 // if sorting them is actually going to do anything so that we only make a
5090 // new landingpad instruction if it does.
5091 for (unsigned k = i; k + 1 < j; ++k)
5092 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
5093 // Not sorted, so sort the filters now. Doing an unstable sort would be
5094 // correct too but reordering filters pointlessly might confuse users.
5095 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
5096 comp: shorter_filter);
5097 MakeNewInstruction = true;
5098 break;
5099 }
5100
5101 // Look for the next batch of filters.
5102 i = j + 1;
5103 }
5104
5105 // If typeinfos matched if and only if equal, then the elements of a filter L
5106 // that occurs later than a filter F could be replaced by the intersection of
5107 // the elements of F and L. In reality two typeinfos can match without being
5108 // equal (for example if one represents a C++ class, and the other some class
5109 // derived from it) so it would be wrong to perform this transform in general.
5110 // However the transform is correct and useful if F is a subset of L. In that
5111 // case L can be replaced by F, and thus removed altogether since repeating a
5112 // filter is pointless. So here we look at all pairs of filters F and L where
5113 // L follows F in the list of clauses, and remove L if every element of F is
5114 // an element of L. This can occur when inlining C++ functions with exception
5115 // specifications.
5116 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5117 // Examine each filter in turn.
5118 Value *Filter = NewClauses[i];
5119 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
5120 if (!FTy)
5121 // Not a filter - skip it.
5122 continue;
5123 unsigned FElts = FTy->getNumElements();
5124 // Examine each filter following this one. Doing this backwards means that
5125 // we don't have to worry about filters disappearing under us when removed.
5126 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5127 Value *LFilter = NewClauses[j];
5128 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
5129 if (!LTy)
5130 // Not a filter - skip it.
5131 continue;
5132 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5133 // an element of LFilter, then discard LFilter.
5134 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5135 // If Filter is empty then it is a subset of LFilter.
5136 if (!FElts) {
5137 // Discard LFilter.
5138 NewClauses.erase(CI: J);
5139 MakeNewInstruction = true;
5140 // Move on to the next filter.
5141 continue;
5142 }
5143 unsigned LElts = LTy->getNumElements();
5144 // If Filter is longer than LFilter then it cannot be a subset of it.
5145 if (FElts > LElts)
5146 // Move on to the next filter.
5147 continue;
5148 // At this point we know that LFilter has at least one element.
5149 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
5150 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5151 // already know that Filter is not longer than LFilter).
5152 if (isa<ConstantAggregateZero>(Val: Filter)) {
5153 assert(FElts <= LElts && "Should have handled this case earlier!");
5154 // Discard LFilter.
5155 NewClauses.erase(CI: J);
5156 MakeNewInstruction = true;
5157 }
5158 // Move on to the next filter.
5159 continue;
5160 }
5161 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
5162 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
5163 // Since Filter is non-empty and contains only zeros, it is a subset of
5164 // LFilter iff LFilter contains a zero.
5165 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5166 for (unsigned l = 0; l != LElts; ++l)
5167 if (isa<ConstantPointerNull>(Val: LArray->getOperand(i_nocapture: l))) {
5168 // LFilter contains a zero - discard it.
5169 NewClauses.erase(CI: J);
5170 MakeNewInstruction = true;
5171 break;
5172 }
5173 // Move on to the next filter.
5174 continue;
5175 }
5176 // At this point we know that both filters are ConstantArrays. Loop over
5177 // operands to see whether every element of Filter is also an element of
5178 // LFilter. Since filters tend to be short this is probably faster than
5179 // using a method that scales nicely.
5180 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
5181 bool AllFound = true;
5182 for (unsigned f = 0; f != FElts; ++f) {
5183 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
5184 AllFound = false;
5185 for (unsigned l = 0; l != LElts; ++l) {
5186 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
5187 if (LTypeInfo == FTypeInfo) {
5188 AllFound = true;
5189 break;
5190 }
5191 }
5192 if (!AllFound)
5193 break;
5194 }
5195 if (AllFound) {
5196 // Discard LFilter.
5197 NewClauses.erase(CI: J);
5198 MakeNewInstruction = true;
5199 }
5200 // Move on to the next filter.
5201 }
5202 }
5203
5204 // If we changed any of the clauses, replace the old landingpad instruction
5205 // with a new one.
5206 if (MakeNewInstruction) {
5207 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
5208 NumReservedClauses: NewClauses.size());
5209 for (Constant *C : NewClauses)
5210 NLI->addClause(ClauseVal: C);
5211 // A landing pad with no clauses must have the cleanup flag set. It is
5212 // theoretically possible, though highly unlikely, that we eliminated all
5213 // clauses. If so, force the cleanup flag to true.
5214 if (NewClauses.empty())
5215 CleanupFlag = true;
5216 NLI->setCleanup(CleanupFlag);
5217 return NLI;
5218 }
5219
5220 // Even if none of the clauses changed, we may nonetheless have understood
5221 // that the cleanup flag is pointless. Clear it if so.
5222 if (LI.isCleanup() != CleanupFlag) {
5223 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5224 LI.setCleanup(CleanupFlag);
5225 return &LI;
5226 }
5227
5228 return nullptr;
5229}
5230
5231Value *
5232InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
5233 // Try to push freeze through instructions that propagate but don't produce
5234 // poison as far as possible. If an operand of freeze follows three
5235 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
5236 // guaranteed-non-poison operands then push the freeze through to the one
5237 // operand that is not guaranteed non-poison. The actual transform is as
5238 // follows.
5239 // Op1 = ... ; Op1 can be posion
5240 // Op0 = Inst(Op1, NonPoisonOps...) ; Op0 has only one use and only have
5241 // ; single guaranteed-non-poison operands
5242 // ... = Freeze(Op0)
5243 // =>
5244 // Op1 = ...
5245 // Op1.fr = Freeze(Op1)
5246 // ... = Inst(Op1.fr, NonPoisonOps...)
5247 auto *OrigOp = OrigFI.getOperand(i_nocapture: 0);
5248 auto *OrigOpInst = dyn_cast<Instruction>(Val: OrigOp);
5249
5250 // While we could change the other users of OrigOp to use freeze(OrigOp), that
5251 // potentially reduces their optimization potential, so let's only do this iff
5252 // the OrigOp is only used by the freeze.
5253 if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(Val: OrigOp))
5254 return nullptr;
5255
5256 // We can't push the freeze through an instruction which can itself create
5257 // poison. If the only source of new poison is flags, we can simply
5258 // strip them (since we know the only use is the freeze and nothing can
5259 // benefit from them.)
5260 if (canCreateUndefOrPoison(Op: cast<Operator>(Val: OrigOp),
5261 /*ConsiderFlagsAndMetadata*/ false))
5262 return nullptr;
5263
5264 // If operand is guaranteed not to be poison, there is no need to add freeze
5265 // to the operand. So we first find the operand that is not guaranteed to be
5266 // poison.
5267 Value *MaybePoisonOperand = nullptr;
5268 for (Value *V : OrigOpInst->operands()) {
5269 if (isa<MetadataAsValue>(Val: V) || isGuaranteedNotToBeUndefOrPoison(V) ||
5270 // Treat identical operands as a single operand.
5271 (MaybePoisonOperand && MaybePoisonOperand == V))
5272 continue;
5273 if (!MaybePoisonOperand)
5274 MaybePoisonOperand = V;
5275 else
5276 return nullptr;
5277 }
5278
5279 OrigOpInst->dropPoisonGeneratingAnnotations();
5280
5281 // If all operands are guaranteed to be non-poison, we can drop freeze.
5282 if (!MaybePoisonOperand)
5283 return OrigOp;
5284
5285 Builder.SetInsertPoint(OrigOpInst);
5286 Value *FrozenMaybePoisonOperand = Builder.CreateFreeze(
5287 V: MaybePoisonOperand, Name: MaybePoisonOperand->getName() + ".fr");
5288
5289 OrigOpInst->replaceUsesOfWith(From: MaybePoisonOperand, To: FrozenMaybePoisonOperand);
5290 return OrigOp;
5291}
5292
5293Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
5294 PHINode *PN) {
5295 // Detect whether this is a recurrence with a start value and some number of
5296 // backedge values. We'll check whether we can push the freeze through the
5297 // backedge values (possibly dropping poison flags along the way) until we
5298 // reach the phi again. In that case, we can move the freeze to the start
5299 // value.
5300 Use *StartU = nullptr;
5301 SmallVector<Value *> Worklist;
5302 for (Use &U : PN->incoming_values()) {
5303 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
5304 // Add backedge value to worklist.
5305 Worklist.push_back(Elt: U.get());
5306 continue;
5307 }
5308
5309 // Don't bother handling multiple start values.
5310 if (StartU)
5311 return nullptr;
5312 StartU = &U;
5313 }
5314
5315 if (!StartU || Worklist.empty())
5316 return nullptr; // Not a recurrence.
5317
5318 Value *StartV = StartU->get();
5319 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
5320 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
5321 // We can't insert freeze if the start value is the result of the
5322 // terminator (e.g. an invoke).
5323 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5324 return nullptr;
5325
5326 SmallPtrSet<Value *, 32> Visited;
5327 SmallVector<Instruction *> DropFlags;
5328 while (!Worklist.empty()) {
5329 Value *V = Worklist.pop_back_val();
5330 if (!Visited.insert(Ptr: V).second)
5331 continue;
5332
5333 if (Visited.size() > 32)
5334 return nullptr; // Limit the total number of values we inspect.
5335
5336 // Assume that PN is non-poison, because it will be after the transform.
5337 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5338 continue;
5339
5340 Instruction *I = dyn_cast<Instruction>(Val: V);
5341 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
5342 /*ConsiderFlagsAndMetadata*/ false))
5343 return nullptr;
5344
5345 DropFlags.push_back(Elt: I);
5346 append_range(C&: Worklist, R: I->operands());
5347 }
5348
5349 for (Instruction *I : DropFlags)
5350 I->dropPoisonGeneratingAnnotations();
5351
5352 if (StartNeedsFreeze) {
5353 Builder.SetInsertPoint(StartBB->getTerminator());
5354 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
5355 Name: StartV->getName() + ".fr");
5356 replaceUse(U&: *StartU, NewValue: FrozenStartV);
5357 }
5358 return replaceInstUsesWith(I&: FI, V: PN);
5359}
5360
5361bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
5362 Value *Op = FI.getOperand(i_nocapture: 0);
5363
5364 if (isa<Constant>(Val: Op) || Op->hasOneUse())
5365 return false;
5366
5367 // Move the freeze directly after the definition of its operand, so that
5368 // it dominates the maximum number of uses. Note that it may not dominate
5369 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5370 // the normal/default destination. This is why the domination check in the
5371 // replacement below is still necessary.
5372 BasicBlock::iterator MoveBefore;
5373 if (isa<Argument>(Val: Op)) {
5374 MoveBefore =
5375 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5376 } else {
5377 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5378 if (!MoveBeforeOpt)
5379 return false;
5380 MoveBefore = *MoveBeforeOpt;
5381 }
5382
5383 // Re-point iterator to come after any debug-info records.
5384 MoveBefore.setHeadBit(false);
5385
5386 bool Changed = false;
5387 if (&FI != &*MoveBefore) {
5388 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
5389 Changed = true;
5390 }
5391
5392 SmallVector<User *> Users;
5393 Changed |= Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
5394 if (!DT.dominates(Def: &FI, U))
5395 return false;
5396
5397 Users.push_back(Elt: U.getUser());
5398 return true;
5399 });
5400
5401 for (auto *U : Users) {
5402 // Re-queue U and its users: freezing U's operand can expose a fold on a
5403 // user of U (e.g. a freeze of U can now be pushed through it) that would
5404 // otherwise only fire on a later iteration, tripping the fixpoint verifier.
5405 auto *UI = cast<Instruction>(Val: U);
5406 Worklist.pushUsersToWorkList(I&: *UI);
5407 Worklist.push(I: UI);
5408 }
5409
5410 return Changed;
5411}
5412
5413// Check if any direct or bitcast user of this value is a shuffle instruction.
5414static bool isUsedWithinShuffleVector(Value *V) {
5415 for (auto *U : V->users()) {
5416 if (isa<ShuffleVectorInst>(Val: U))
5417 return true;
5418 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5419 return true;
5420 }
5421 return false;
5422}
5423
5424Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5425 Value *Op0 = I.getOperand(i_nocapture: 0);
5426
5427 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5428 return replaceInstUsesWith(I, V);
5429
5430 // freeze (phi const, x) --> phi const, (freeze x)
5431 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5432 if (Instruction *NV = foldOpIntoPhi(I, PN))
5433 return NV;
5434 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5435 return NV;
5436 }
5437
5438 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5439 return replaceInstUsesWith(I, V: NI);
5440
5441 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5442 // - or: pick -1
5443 // - select's condition: if the true value is constant, choose it by making
5444 // the condition true.
5445 // - phi: pick the common constant across operands
5446 // - default: pick 0
5447 //
5448 // Note that this transform is intentionally done here rather than
5449 // via an analysis in InstSimplify or at individual user sites. That is
5450 // because we must produce the same value for all uses of the freeze -
5451 // it's the reason "freeze" exists!
5452 //
5453 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5454 // duplicating logic for binops at least.
5455 auto getUndefReplacement = [&](Type *Ty) {
5456 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5457 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5458 // removed.
5459 Constant *BestValue = nullptr;
5460 for (Value *V : PN.incoming_values()) {
5461 if (match(V, P: m_Freeze(Op: m_Undef())))
5462 continue;
5463
5464 Constant *C = dyn_cast<Constant>(Val: V);
5465 if (!C)
5466 return nullptr;
5467
5468 if (!isGuaranteedNotToBeUndefOrPoison(V: C))
5469 return nullptr;
5470
5471 if (BestValue && BestValue != C)
5472 return nullptr;
5473
5474 BestValue = C;
5475 }
5476 return BestValue;
5477 };
5478
5479 Value *NullValue = Constant::getNullValue(Ty);
5480 Value *BestValue = nullptr;
5481 for (auto *U : I.users()) {
5482 Value *V = NullValue;
5483 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5484 V = ConstantInt::getAllOnesValue(Ty);
5485 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5486 V = ConstantInt::getTrue(Ty);
5487 else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5488 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5489 V = NullValue;
5490 } else if (auto *PHI = dyn_cast<PHINode>(Val: U)) {
5491 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5492 V = MaybeV;
5493 }
5494
5495 if (!BestValue)
5496 BestValue = V;
5497 else if (BestValue != V)
5498 BestValue = NullValue;
5499 }
5500 assert(BestValue && "Must have at least one use");
5501 assert(BestValue != &I && "Cannot replace with itself");
5502 return BestValue;
5503 };
5504
5505 if (match(V: Op0, P: m_Undef())) {
5506 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5507 // a shuffle. This may improve codegen for shuffles that allow
5508 // unspecified inputs.
5509 if (isUsedWithinShuffleVector(V: &I))
5510 return nullptr;
5511 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
5512 }
5513
5514 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5515 Type *Ty = C->getType();
5516 auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5517 if (!VTy)
5518 return nullptr;
5519 Constant *BestValue;
5520 if (!match(V: C, P: m_ContainsMatchingVectorElement(SubPattern: m_CombineAnd(
5521 Ps: m_Unless(M: m_Undef()), Ps: m_Constant(C&: BestValue)))))
5522 BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5523 return Constant::replaceUndefsWith(C, Replacement: BestValue);
5524 };
5525
5526 Constant *C;
5527 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5528 !C->containsConstantExpression()) {
5529 if (Constant *Repl = getFreezeVectorReplacement(C))
5530 return replaceInstUsesWith(I, V: Repl);
5531 }
5532
5533 // Replace uses of Op with freeze(Op).
5534 if (freezeOtherUses(FI&: I))
5535 return &I;
5536
5537 return nullptr;
5538}
5539
5540/// Check for case where the call writes to an otherwise dead alloca. This
5541/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5542/// helper *only* analyzes the write; doesn't check any other legality aspect.
5543static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5544 auto *CB = dyn_cast<CallBase>(Val: I);
5545 if (!CB)
5546 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5547 // to allow reload along used path as described below. Otherwise, this
5548 // is simply a store to a dead allocation which will be removed.
5549 return false;
5550 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5551 if (!Dest)
5552 return false;
5553 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
5554 if (!AI)
5555 // TODO: allow malloc?
5556 return false;
5557 // TODO: allow memory access dominated by move point? Note that since AI
5558 // could have a reference to itself captured by the call, we would need to
5559 // account for cycles in doing so.
5560 SmallVector<const User *> AllocaUsers;
5561 SmallPtrSet<const User *, 4> Visited;
5562 auto pushUsers = [&](const Instruction &I) {
5563 for (const User *U : I.users()) {
5564 if (Visited.insert(Ptr: U).second)
5565 AllocaUsers.push_back(Elt: U);
5566 }
5567 };
5568 pushUsers(*AI);
5569 while (!AllocaUsers.empty()) {
5570 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5571 if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) {
5572 pushUsers(*UserI);
5573 continue;
5574 }
5575 if (UserI == CB)
5576 continue;
5577 // TODO: support lifetime.start/end here
5578 return false;
5579 }
5580 return true;
5581}
5582
5583/// Try to move the specified instruction from its current block into the
5584/// beginning of DestBlock, which can only happen if it's safe to move the
5585/// instruction past all of the instructions between it and the end of its
5586/// block.
5587bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5588 BasicBlock *DestBlock) {
5589 BasicBlock *SrcBlock = I->getParent();
5590
5591 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5592 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5593 I->isTerminator())
5594 return false;
5595
5596 // Do not sink static or dynamic alloca instructions. Static allocas must
5597 // remain in the entry block, and dynamic allocas must not be sunk in between
5598 // a stacksave / stackrestore pair, which would incorrectly shorten its
5599 // lifetime.
5600 if (isa<AllocaInst>(Val: I))
5601 return false;
5602
5603 // Do not sink into catchswitch blocks.
5604 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5605 return false;
5606
5607 // Do not sink convergent call instructions.
5608 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5609 if (CI->isConvergent())
5610 return false;
5611 }
5612
5613 // Unless we can prove that the memory write isn't visibile except on the
5614 // path we're sinking to, we must bail.
5615 if (I->mayWriteToMemory()) {
5616 if (!SoleWriteToDeadLocal(I, TLI))
5617 return false;
5618 }
5619
5620 // We can only sink load instructions if there is nothing between the load and
5621 // the end of block that could change the value.
5622 if (I->mayReadFromMemory() &&
5623 !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5624 // We don't want to do any sophisticated alias analysis, so we only check
5625 // the instructions after I in I's parent block if we try to sink to its
5626 // successor block.
5627 if (DestBlock->getUniquePredecessor() != I->getParent())
5628 return false;
5629 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5630 E = I->getParent()->end();
5631 Scan != E; ++Scan)
5632 if (Scan->mayWriteToMemory())
5633 return false;
5634 }
5635
5636 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5637 auto *I = dyn_cast<Instruction>(Val: U->getUser());
5638 if (I && I->getParent() != DestBlock) {
5639 Worklist.add(I);
5640 return true;
5641 }
5642 return false;
5643 });
5644 /// FIXME: We could remove droppable uses that are not dominated by
5645 /// the new position.
5646
5647 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5648 I->moveBefore(BB&: *DestBlock, I: InsertPos);
5649 ++NumSunkInst;
5650
5651 // Also sink all related debug uses from the source basic block. Otherwise we
5652 // get debug use before the def. Attempt to salvage debug uses first, to
5653 // maximise the range variables have location for. If we cannot salvage, then
5654 // mark the location undef: we know it was supposed to receive a new location
5655 // here, but that computation has been sunk.
5656 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5657 findDbgUsers(V: I, DbgVariableRecords);
5658 if (!DbgVariableRecords.empty())
5659 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5660 DPUsers&: DbgVariableRecords);
5661
5662 // PS: there are numerous flaws with this behaviour, not least that right now
5663 // assignments can be re-ordered past other assignments to the same variable
5664 // if they use different Values. Creating more undef assignements can never be
5665 // undone. And salvaging all users outside of this block can un-necessarily
5666 // alter the lifetime of the live-value that the variable refers to.
5667 // Some of these things can be resolved by tolerating debug use-before-defs in
5668 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5669 // being used for more architectures.
5670
5671 return true;
5672}
5673
5674void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5675 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5676 BasicBlock *DestBlock,
5677 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5678 // For all debug values in the destination block, the sunk instruction
5679 // will still be available, so they do not need to be dropped.
5680
5681 // Fetch all DbgVariableRecords not already in the destination.
5682 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5683 for (auto &DVR : DbgVariableRecords)
5684 if (DVR->getParent() != DestBlock)
5685 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5686
5687 // Fetch a second collection, of DbgVariableRecords in the source block that
5688 // we're going to sink.
5689 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5690 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5691 if (DVR->getParent() == SrcBlock)
5692 DbgVariableRecordsToSink.push_back(Elt: DVR);
5693
5694 // Sort DbgVariableRecords according to their position in the block. This is a
5695 // partial order: DbgVariableRecords attached to different instructions will
5696 // be ordered by the instruction order, but DbgVariableRecords attached to the
5697 // same instruction won't have an order.
5698 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5699 return B->getInstruction()->comesBefore(Other: A->getInstruction());
5700 };
5701 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5702
5703 // If there are two assignments to the same variable attached to the same
5704 // instruction, the ordering between the two assignments is important. Scan
5705 // for this (rare) case and establish which is the last assignment.
5706 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5707 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5708 if (DbgVariableRecordsToSink.size() > 1) {
5709 SmallDenseMap<InstVarPair, unsigned> CountMap;
5710 // Count how many assignments to each variable there is per instruction.
5711 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5712 DebugVariable DbgUserVariable =
5713 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5714 DVR->getDebugLoc()->getInlinedAt());
5715 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
5716 }
5717
5718 // If there are any instructions with two assignments, add them to the
5719 // FilterOutMap to record that they need extra filtering.
5720 SmallPtrSet<const Instruction *, 4> DupSet;
5721 for (auto It : CountMap) {
5722 if (It.second > 1) {
5723 FilterOutMap[It.first] = nullptr;
5724 DupSet.insert(Ptr: It.first.first);
5725 }
5726 }
5727
5728 // For all instruction/variable pairs needing extra filtering, find the
5729 // latest assignment.
5730 for (const Instruction *Inst : DupSet) {
5731 for (DbgVariableRecord &DVR :
5732 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5733 DebugVariable DbgUserVariable =
5734 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5735 DVR.getDebugLoc()->getInlinedAt());
5736 auto FilterIt =
5737 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5738 if (FilterIt == FilterOutMap.end())
5739 continue;
5740 if (FilterIt->second != nullptr)
5741 continue;
5742 FilterIt->second = &DVR;
5743 }
5744 }
5745 }
5746
5747 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5748 // out any duplicate assignments identified above.
5749 SmallVector<DbgVariableRecord *, 2> DVRClones;
5750 SmallSet<DebugVariable, 4> SunkVariables;
5751 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5752 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5753 continue;
5754
5755 DebugVariable DbgUserVariable =
5756 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5757 DVR->getDebugLoc()->getInlinedAt());
5758
5759 // For any variable where there were multiple assignments in the same place,
5760 // ignore all but the last assignment.
5761 if (!FilterOutMap.empty()) {
5762 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5763 auto It = FilterOutMap.find(Val: IVP);
5764
5765 // Filter out.
5766 if (It != FilterOutMap.end() && It->second != DVR)
5767 continue;
5768 }
5769
5770 if (!SunkVariables.insert(V: DbgUserVariable).second)
5771 continue;
5772
5773 if (DVR->isDbgAssign())
5774 continue;
5775
5776 DVRClones.emplace_back(Args: DVR->clone());
5777 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5778 }
5779
5780 // Perform salvaging without the clones, then sink the clones.
5781 if (DVRClones.empty())
5782 return;
5783
5784 salvageDebugInfoForDbgValues(I&: *I, DPInsns: DbgVariableRecordsToSalvage);
5785
5786 // The clones are in reverse order of original appearance. Assert that the
5787 // head bit is set on the iterator as we _should_ have received it via
5788 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5789 // we'll repeatedly insert at the head, such as:
5790 // DVR-3 (third insertion goes here)
5791 // DVR-2 (second insertion goes here)
5792 // DVR-1 (first insertion goes here)
5793 // Any-Prior-DVRs
5794 // InsertPtInst
5795 assert(InsertPos.getHeadBit());
5796 for (DbgVariableRecord *DVRClone : DVRClones) {
5797 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5798 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5799 }
5800}
5801
5802bool InstCombinerImpl::run() {
5803 while (!Worklist.isEmpty()) {
5804 // Walk deferred instructions in reverse order, and push them to the
5805 // worklist, which means they'll end up popped from the worklist in-order.
5806 while (Instruction *I = Worklist.popDeferred()) {
5807 // Check to see if we can DCE the instruction. We do this already here to
5808 // reduce the number of uses and thus allow other folds to trigger.
5809 // Note that eraseInstFromFunction() may push additional instructions on
5810 // the deferred worklist, so this will DCE whole instruction chains.
5811 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5812 eraseInstFromFunction(I&: *I);
5813 ++NumDeadInst;
5814 continue;
5815 }
5816
5817 Worklist.push(I);
5818 }
5819
5820 Instruction *I = Worklist.removeOne();
5821 if (I == nullptr) continue; // skip null values.
5822
5823 // Check to see if we can DCE the instruction.
5824 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5825 eraseInstFromFunction(I&: *I);
5826 ++NumDeadInst;
5827 continue;
5828 }
5829
5830 if (!DebugCounter::shouldExecute(Counter&: VisitCounter))
5831 continue;
5832
5833 // See if we can trivially sink this instruction to its user if we can
5834 // prove that the successor is not executed more frequently than our block.
5835 // Return the UserBlock if successful.
5836 auto getOptionalSinkBlockForInst =
5837 [this](Instruction *I) -> std::optional<BasicBlock *> {
5838 if (!EnableCodeSinking)
5839 return std::nullopt;
5840
5841 BasicBlock *BB = I->getParent();
5842 BasicBlock *UserParent = nullptr;
5843 unsigned NumUsers = 0;
5844
5845 for (Use &U : I->uses()) {
5846 User *User = U.getUser();
5847 if (User->isDroppable()) {
5848 // Do not sink if there are dereferenceable assumes that would be
5849 // removed.
5850 auto II = dyn_cast<IntrinsicInst>(Val: User);
5851 if (II->getIntrinsicID() != Intrinsic::assume ||
5852 !II->getOperandBundle(Name: "dereferenceable"))
5853 continue;
5854 }
5855
5856 if (NumUsers > MaxSinkNumUsers)
5857 return std::nullopt;
5858
5859 Instruction *UserInst = cast<Instruction>(Val: User);
5860 // Special handling for Phi nodes - get the block the use occurs in.
5861 BasicBlock *UserBB = UserInst->getParent();
5862 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5863 UserBB = PN->getIncomingBlock(U);
5864 // Bail out if we have uses in different blocks. We don't do any
5865 // sophisticated analysis (i.e finding NearestCommonDominator of these
5866 // use blocks).
5867 if (UserParent && UserParent != UserBB)
5868 return std::nullopt;
5869 UserParent = UserBB;
5870
5871 // Make sure these checks are done only once, naturally we do the checks
5872 // the first time we get the userparent, this will save compile time.
5873 if (NumUsers == 0) {
5874 // Try sinking to another block. If that block is unreachable, then do
5875 // not bother. SimplifyCFG should handle it.
5876 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
5877 return std::nullopt;
5878
5879 auto *Term = UserParent->getTerminator();
5880 // See if the user is one of our successors that has only one
5881 // predecessor, so that we don't have to split the critical edge.
5882 // Another option where we can sink is a block that ends with a
5883 // terminator that does not pass control to other block (such as
5884 // return or unreachable or resume). In this case:
5885 // - I dominates the User (by SSA form);
5886 // - the User will be executed at most once.
5887 // So sinking I down to User is always profitable or neutral.
5888 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5889 return std::nullopt;
5890
5891 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5892 }
5893
5894 NumUsers++;
5895 }
5896
5897 // No user or only has droppable users.
5898 if (!UserParent)
5899 return std::nullopt;
5900
5901 return UserParent;
5902 };
5903
5904 auto OptBB = getOptionalSinkBlockForInst(I);
5905 if (OptBB) {
5906 auto *UserParent = *OptBB;
5907 // Okay, the CFG is simple enough, try to sink this instruction.
5908 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5909 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5910 MadeIRChange = true;
5911 // We'll add uses of the sunk instruction below, but since
5912 // sinking can expose opportunities for it's *operands* add
5913 // them to the worklist
5914 for (Use &U : I->operands())
5915 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5916 Worklist.push(I: OpI);
5917 }
5918 }
5919
5920 // Now that we have an instruction, try combining it to simplify it.
5921 Builder.SetInsertPoint(I);
5922 Builder.SetCurrentDebugLocation(I->getDebugLoc());
5923 // Used by our IRBuilder inserter to copy annotation metadata.
5924 AnnotationMetadataSource = I;
5925
5926#ifndef NDEBUG
5927 std::string OrigI;
5928#endif
5929 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5930 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5931
5932 if (Instruction *Result = visit(I&: *I)) {
5933 ++NumCombined;
5934 // Should we replace the old instruction with a new one?
5935 if (Result != I) {
5936 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5937 << " New = " << *Result << '\n');
5938
5939 // We copy the old instruction's DebugLoc to the new instruction, unless
5940 // InstCombine already assigned a DebugLoc to it, in which case we
5941 // should trust the more specifically selected DebugLoc.
5942 Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5943 // We also copy annotation metadata to the new instruction.
5944 Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5945 // Everything uses the new instruction now.
5946 I->replaceAllUsesWith(V: Result);
5947
5948 // Move the name to the new instruction first.
5949 Result->takeName(V: I);
5950
5951 // Insert the new instruction into the basic block...
5952 BasicBlock *InstParent = I->getParent();
5953 BasicBlock::iterator InsertPos = I->getIterator();
5954
5955 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5956 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5957 // We need to fix up the insertion point.
5958 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5959 InsertPos = InstParent->getFirstInsertionPt();
5960 else // Non-PHI -> PHI
5961 InsertPos = InstParent->getFirstNonPHIIt();
5962 }
5963
5964 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5965
5966 // Register newly created assumptions.
5967 if (auto *Assume = dyn_cast<AssumeInst>(Val: Result))
5968 AC.registerAssumption(CI: Assume);
5969
5970 // Push the new instruction and any users onto the worklist.
5971 Worklist.pushUsersToWorkList(I&: *Result);
5972 Worklist.push(I: Result);
5973
5974 eraseInstFromFunction(I&: *I);
5975 } else {
5976 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5977 << " New = " << *I << '\n');
5978
5979 // If the instruction was modified, it's possible that it is now dead.
5980 // if so, remove it.
5981 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5982 eraseInstFromFunction(I&: *I);
5983 } else {
5984 Worklist.pushUsersToWorkList(I&: *I);
5985 Worklist.push(I);
5986 }
5987 }
5988 MadeIRChange = true;
5989 }
5990 }
5991
5992 Worklist.zap();
5993 return MadeIRChange;
5994}
5995
5996// Track the scopes used by !alias.scope and !noalias. In a function, a
5997// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5998// by both sets. If not, the declaration of the scope can be safely omitted.
5999// The MDNode of the scope can be omitted as well for the instructions that are
6000// part of this function. We do not do that at this point, as this might become
6001// too time consuming to do.
6002class AliasScopeTracker {
6003 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
6004 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
6005
6006public:
6007 void analyse(Instruction *I) {
6008 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
6009 if (!I->hasMetadataOtherThanDebugLoc())
6010 return;
6011
6012 auto Track = [](Metadata *ScopeList, auto &Container) {
6013 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
6014 if (!MDScopeList || !Container.insert(MDScopeList).second)
6015 return;
6016 for (const auto &MDOperand : MDScopeList->operands())
6017 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
6018 Container.insert(MDScope);
6019 };
6020
6021 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
6022 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
6023 }
6024
6025 bool isNoAliasScopeDeclDead(Instruction *Inst) {
6026 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
6027 if (!Decl)
6028 return false;
6029
6030 assert(Decl->use_empty() &&
6031 "llvm.experimental.noalias.scope.decl in use ?");
6032 const MDNode *MDSL = Decl->getScopeList();
6033 assert(MDSL->getNumOperands() == 1 &&
6034 "llvm.experimental.noalias.scope should refer to a single scope");
6035 auto &MDOperand = MDSL->getOperand(I: 0);
6036 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
6037 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
6038 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
6039
6040 // Not an MDNode ? throw away.
6041 return true;
6042 }
6043};
6044
6045/// Populate the IC worklist from a function, by walking it in reverse
6046/// post-order and adding all reachable code to the worklist.
6047///
6048/// This has a couple of tricks to make the code faster and more powerful. In
6049/// particular, we constant fold and DCE instructions as we go, to avoid adding
6050/// them to the worklist (this significantly speeds up instcombine on code where
6051/// many instructions are dead or constant). Additionally, if we find a branch
6052/// whose condition is a known constant, we only visit the reachable successors.
6053bool InstCombinerImpl::prepareWorklist(Function &F) {
6054 bool MadeIRChange = false;
6055 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
6056 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
6057 DenseMap<Constant *, Constant *> FoldedConstants;
6058 AliasScopeTracker SeenAliasScopes;
6059
6060 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
6061 for (BasicBlock *Succ : successors(BB))
6062 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
6063 for (PHINode &PN : Succ->phis())
6064 for (Use &U : PN.incoming_values())
6065 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
6066 U.set(PoisonValue::get(T: PN.getType()));
6067 MadeIRChange = true;
6068 }
6069 };
6070
6071 for (BasicBlock *BB : RPOT) {
6072 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
6073 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
6074 })) {
6075 HandleOnlyLiveSuccessor(BB, nullptr);
6076 continue;
6077 }
6078 LiveBlocks.insert(Ptr: BB);
6079
6080 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
6081 // ConstantProp instruction if trivially constant.
6082 if (!Inst.use_empty() &&
6083 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
6084 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
6085 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
6086 << '\n');
6087 Inst.replaceAllUsesWith(V: C);
6088 ++NumConstProp;
6089 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
6090 Inst.eraseFromParent();
6091 MadeIRChange = true;
6092 continue;
6093 }
6094
6095 // See if we can constant fold its operands.
6096 for (Use &U : Inst.operands()) {
6097 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
6098 continue;
6099
6100 auto *C = cast<Constant>(Val&: U);
6101 Constant *&FoldRes = FoldedConstants[C];
6102 if (!FoldRes)
6103 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
6104
6105 if (FoldRes != C) {
6106 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6107 << "\n Old = " << *C
6108 << "\n New = " << *FoldRes << '\n');
6109 U = FoldRes;
6110 MadeIRChange = true;
6111 }
6112 }
6113
6114 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6115 // these call instructions consumes non-trivial amount of time and
6116 // provides no value for the optimization.
6117 if (!Inst.isDebugOrPseudoInst()) {
6118 InstrsForInstructionWorklist.push_back(Elt: &Inst);
6119 SeenAliasScopes.analyse(I: &Inst);
6120 }
6121 }
6122
6123 // If this is a branch or switch on a constant, mark only the single
6124 // live successor. Otherwise assume all successors are live.
6125 Instruction *TI = BB->getTerminator();
6126 if (CondBrInst *BI = dyn_cast<CondBrInst>(Val: TI)) {
6127 if (isa<UndefValue>(Val: BI->getCondition())) {
6128 // Branch on undef is UB.
6129 HandleOnlyLiveSuccessor(BB, nullptr);
6130 continue;
6131 }
6132 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
6133 bool CondVal = Cond->getZExtValue();
6134 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
6135 continue;
6136 }
6137 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
6138 if (isa<UndefValue>(Val: SI->getCondition())) {
6139 // Switch on undef is UB.
6140 HandleOnlyLiveSuccessor(BB, nullptr);
6141 continue;
6142 }
6143 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
6144 HandleOnlyLiveSuccessor(BB,
6145 SI->findCaseValue(C: Cond)->getCaseSuccessor());
6146 continue;
6147 }
6148 }
6149 }
6150
6151 // Remove instructions inside unreachable blocks. This prevents the
6152 // instcombine code from having to deal with some bad special cases, and
6153 // reduces use counts of instructions.
6154 for (BasicBlock &BB : F) {
6155 if (LiveBlocks.count(Ptr: &BB))
6156 continue;
6157
6158 unsigned NumDeadInstInBB;
6159 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
6160
6161 MadeIRChange |= NumDeadInstInBB != 0;
6162 NumDeadInst += NumDeadInstInBB;
6163 }
6164
6165 // Once we've found all of the instructions to add to instcombine's worklist,
6166 // add them in reverse order. This way instcombine will visit from the top
6167 // of the function down. This jives well with the way that it adds all uses
6168 // of instructions to the worklist after doing a transformation, thus avoiding
6169 // some N^2 behavior in pathological cases.
6170 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
6171 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
6172 // DCE instruction if trivially dead. As we iterate in reverse program
6173 // order here, we will clean up whole chains of dead instructions.
6174 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
6175 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6176 ++NumDeadInst;
6177 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6178 salvageDebugInfo(I&: *Inst);
6179 Inst->eraseFromParent();
6180 MadeIRChange = true;
6181 continue;
6182 }
6183
6184 Worklist.push(I: Inst);
6185 }
6186
6187 return MadeIRChange;
6188}
6189
6190void InstCombiner::computeBackEdges() {
6191 // Collect backedges.
6192 SmallVector<bool> Visited(F.getMaxBlockNumber());
6193 for (BasicBlock *BB : RPOT) {
6194 Visited[BB->getNumber()] = true;
6195 for (BasicBlock *Succ : successors(BB))
6196 if (Visited[Succ->getNumber()])
6197 BackEdges.insert(V: {BB, Succ});
6198 }
6199 ComputedBackEdges = true;
6200}
6201
6202static bool combineInstructionsOverFunction(
6203 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
6204 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
6205 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
6206 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
6207 const InstCombineOptions &Opts) {
6208 auto &DL = F.getDataLayout();
6209 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6210 !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
6211
6212 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
6213
6214 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6215 // by instcombiner.
6216 bool MadeIRChange = false;
6217 if (ShouldLowerDbgDeclare)
6218 MadeIRChange = LowerDbgDeclare(F);
6219
6220 // Iterate while there is work to do.
6221 unsigned Iteration = 0;
6222 while (true) {
6223 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6224 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6225 << " on " << F.getName()
6226 << " reached; stopping without verifying fixpoint\n");
6227 break;
6228 }
6229
6230 ++Iteration;
6231 ++NumWorklistIterations;
6232 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6233 << F.getName() << "\n");
6234
6235 InstCombinerImpl IC(Worklist, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, PSI,
6236 DL, RPOT);
6237 IC.MaxArraySizeForCombine = MaxArraySize;
6238 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6239 MadeChangeInThisIteration |= IC.run();
6240 if (!MadeChangeInThisIteration)
6241 break;
6242
6243 MadeIRChange = true;
6244 if (Iteration > Opts.MaxIterations) {
6245 reportFatalUsageError(
6246 reason: "Instruction Combining on " + Twine(F.getName()) +
6247 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6248 " iterations. " +
6249 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6250 "'instcombine-no-verify-fixpoint' to suppress this error.");
6251 }
6252 }
6253
6254 if (Iteration == 1)
6255 ++NumOneIteration;
6256 else if (Iteration == 2)
6257 ++NumTwoIterations;
6258 else if (Iteration == 3)
6259 ++NumThreeIterations;
6260 else
6261 ++NumFourOrMoreIterations;
6262
6263 return MadeIRChange;
6264}
6265
6266InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
6267
6268void InstCombinePass::printPipeline(
6269 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6270 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6271 OS, MapClassName2PassName);
6272 OS << '<';
6273 OS << "max-iterations=" << Options.MaxIterations << ";";
6274 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6275 OS << '>';
6276}
6277
6278char InstCombinePass::ID = 0;
6279
6280PreservedAnalyses InstCombinePass::run(Function &F,
6281 FunctionAnalysisManager &AM) {
6282 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
6283 // No changes since last InstCombine pass, exit early.
6284 if (LRT.shouldSkip(ID: &ID))
6285 return PreservedAnalyses::all();
6286
6287 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
6288 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
6289 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
6290 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
6291 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
6292
6293 auto *AA = &AM.getResult<AAManager>(IR&: F);
6294 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
6295 ProfileSummaryInfo *PSI =
6296 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
6297 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6298 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
6299 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
6300
6301 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6302 BFI, BPI, PSI, Opts: Options)) {
6303 // No changes, all analyses are preserved.
6304 LRT.update(ID: &ID, /*Changed=*/false);
6305 return PreservedAnalyses::all();
6306 }
6307
6308 // Mark all the analyses that instcombine updates as preserved.
6309 PreservedAnalyses PA;
6310 LRT.update(ID: &ID, /*Changed=*/true);
6311 PA.preserve<LastRunTrackingAnalysis>();
6312 PA.preserveSet<CFGAnalyses>();
6313 return PA;
6314}
6315
6316void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
6317 AU.setPreservesCFG();
6318 AU.addRequired<AAResultsWrapperPass>();
6319 AU.addRequired<AssumptionCacheTracker>();
6320 AU.addRequired<TargetLibraryInfoWrapperPass>();
6321 AU.addRequired<TargetTransformInfoWrapperPass>();
6322 AU.addRequired<DominatorTreeWrapperPass>();
6323 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
6324 AU.addPreserved<DominatorTreeWrapperPass>();
6325 AU.addPreserved<AAResultsWrapperPass>();
6326 AU.addPreserved<BasicAAWrapperPass>();
6327 AU.addPreserved<GlobalsAAWrapperPass>();
6328 AU.addRequired<ProfileSummaryInfoWrapperPass>();
6329 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
6330}
6331
6332bool InstructionCombiningPass::runOnFunction(Function &F) {
6333 if (skipFunction(F))
6334 return false;
6335
6336 // Required analyses.
6337 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6338 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6339 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6340 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6341 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6342 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6343
6344 // Optional analyses.
6345 ProfileSummaryInfo *PSI =
6346 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6347 BlockFrequencyInfo *BFI =
6348 (PSI && PSI->hasProfileSummary()) ?
6349 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6350 nullptr;
6351 BranchProbabilityInfo *BPI = nullptr;
6352 if (auto *WrapperPass =
6353 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6354 BPI = &WrapperPass->getBPI();
6355
6356 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6357 BFI, BPI, PSI, Opts: InstCombineOptions());
6358}
6359
6360char InstructionCombiningPass::ID = 0;
6361
6362InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {}
6363
6364INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6365 "Combine redundant instructions", false, false)
6366INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6367INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6368INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6369INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6370INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6371INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6372INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6373INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6374INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6375INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6376 "Combine redundant instructions", false, false)
6377
6378// Initialization Routines.
6379void llvm::initializeInstCombine(PassRegistry &Registry) {
6380 initializeInstructionCombiningPassPass(Registry);
6381}
6382
6383FunctionPass *llvm::createInstructionCombiningPass() {
6384 return new InstructionCombiningPass();
6385}
6386