1//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// InstructionCombining - Combine instructions to form fewer, simple
10// instructions. This pass does not modify the CFG. This pass is where
11// algebraic simplification happens.
12//
13// This pass combines things like:
14// %Y = add i32 %X, 1
15// %Z = add i32 %Y, 1
16// into:
17// %Z = add i32 %X, 2
18//
19// This is a simple worklist driven algorithm.
20//
21// This pass guarantees that the following canonicalizations are performed on
22// the program:
23// 1. If a binary operator has a constant operand, it is moved to the RHS
24// 2. Bitwise operators with constant operands are always grouped so that
25// shifts are performed first, then or's, then and's, then xor's.
26// 3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
27// 4. All cmp instructions on boolean values are replaced with logical ops
28// 5. add X, X is represented as (X*2) => (X << 1)
29// 6. Multiplies with a power-of-two constant argument are transformed into
30// shifts.
31// ... etc.
32//
33//===----------------------------------------------------------------------===//
34
35#include "InstCombineInternal.h"
36#include "llvm/ADT/APFloat.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
39#include "llvm/ADT/DenseMap.h"
40#include "llvm/ADT/SmallPtrSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43#include "llvm/Analysis/AliasAnalysis.h"
44#include "llvm/Analysis/AssumptionCache.h"
45#include "llvm/Analysis/BasicAliasAnalysis.h"
46#include "llvm/Analysis/BlockFrequencyInfo.h"
47#include "llvm/Analysis/CFG.h"
48#include "llvm/Analysis/ConstantFolding.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/Analysis/InstructionSimplify.h"
51#include "llvm/Analysis/LastRunTrackingAnalysis.h"
52#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
53#include "llvm/Analysis/MemoryBuiltins.h"
54#include "llvm/Analysis/OptimizationRemarkEmitter.h"
55#include "llvm/Analysis/ProfileSummaryInfo.h"
56#include "llvm/Analysis/TargetFolder.h"
57#include "llvm/Analysis/TargetLibraryInfo.h"
58#include "llvm/Analysis/TargetTransformInfo.h"
59#include "llvm/Analysis/Utils/Local.h"
60#include "llvm/Analysis/ValueTracking.h"
61#include "llvm/Analysis/VectorUtils.h"
62#include "llvm/IR/BasicBlock.h"
63#include "llvm/IR/CFG.h"
64#include "llvm/IR/Constant.h"
65#include "llvm/IR/Constants.h"
66#include "llvm/IR/DIBuilder.h"
67#include "llvm/IR/DataLayout.h"
68#include "llvm/IR/DebugInfo.h"
69#include "llvm/IR/DerivedTypes.h"
70#include "llvm/IR/Dominators.h"
71#include "llvm/IR/EHPersonalities.h"
72#include "llvm/IR/Function.h"
73#include "llvm/IR/GetElementPtrTypeIterator.h"
74#include "llvm/IR/IRBuilder.h"
75#include "llvm/IR/InstrTypes.h"
76#include "llvm/IR/Instruction.h"
77#include "llvm/IR/Instructions.h"
78#include "llvm/IR/IntrinsicInst.h"
79#include "llvm/IR/Intrinsics.h"
80#include "llvm/IR/Metadata.h"
81#include "llvm/IR/Operator.h"
82#include "llvm/IR/PassManager.h"
83#include "llvm/IR/PatternMatch.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/Use.h"
86#include "llvm/IR/User.h"
87#include "llvm/IR/Value.h"
88#include "llvm/IR/ValueHandle.h"
89#include "llvm/InitializePasses.h"
90#include "llvm/Support/Casting.h"
91#include "llvm/Support/CommandLine.h"
92#include "llvm/Support/Compiler.h"
93#include "llvm/Support/Debug.h"
94#include "llvm/Support/DebugCounter.h"
95#include "llvm/Support/ErrorHandling.h"
96#include "llvm/Support/KnownBits.h"
97#include "llvm/Support/KnownFPClass.h"
98#include "llvm/Support/raw_ostream.h"
99#include "llvm/Transforms/InstCombine/InstCombine.h"
100#include "llvm/Transforms/Utils/BasicBlockUtils.h"
101#include "llvm/Transforms/Utils/Local.h"
102#include <algorithm>
103#include <cassert>
104#include <cstdint>
105#include <memory>
106#include <optional>
107#include <string>
108#include <utility>
109
110#define DEBUG_TYPE "instcombine"
111#include "llvm/Transforms/Utils/InstructionWorklist.h"
112#include <optional>
113
114using namespace llvm;
115using namespace llvm::PatternMatch;
116
117STATISTIC(NumWorklistIterations,
118 "Number of instruction combining iterations performed");
119STATISTIC(NumOneIteration, "Number of functions with one iteration");
120STATISTIC(NumTwoIterations, "Number of functions with two iterations");
121STATISTIC(NumThreeIterations, "Number of functions with three iterations");
122STATISTIC(NumFourOrMoreIterations,
123 "Number of functions with four or more iterations");
124
125STATISTIC(NumCombined , "Number of insts combined");
126STATISTIC(NumConstProp, "Number of constant folds");
127STATISTIC(NumDeadInst , "Number of dead inst eliminated");
128STATISTIC(NumSunkInst , "Number of instructions sunk");
129STATISTIC(NumExpand, "Number of expansions");
130STATISTIC(NumFactor , "Number of factorizations");
131STATISTIC(NumReassoc , "Number of reassociations");
132DEBUG_COUNTER(VisitCounter, "instcombine-visit",
133 "Controls which instructions are visited");
134
135static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
136 cl::desc("Enable code sinking"),
137 cl::init(Val: true));
138
139static cl::opt<unsigned> MaxSinkNumUsers(
140 "instcombine-max-sink-users", cl::init(Val: 32),
141 cl::desc("Maximum number of undroppable users for instruction sinking"));
142
143static cl::opt<unsigned>
144MaxArraySize("instcombine-maxarray-size", cl::init(Val: 1024),
145 cl::desc("Maximum array size considered when doing a combine"));
146
147namespace llvm {
148extern cl::opt<bool> ProfcheckDisableMetadataFixes;
149} // end namespace llvm
150
151// FIXME: Remove this flag when it is no longer necessary to convert
152// llvm.dbg.declare to avoid inaccurate debug info. Setting this to false
153// increases variable availability at the cost of accuracy. Variables that
154// cannot be promoted by mem2reg or SROA will be described as living in memory
155// for their entire lifetime. However, passes like DSE and instcombine can
156// delete stores to the alloca, leading to misleading and inaccurate debug
157// information. This flag can be removed when those passes are fixed.
158static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
159 cl::Hidden, cl::init(Val: true));
160
161std::optional<Instruction *>
162InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
163 // Handle target specific intrinsics
164 if (II.getCalledFunction()->isTargetIntrinsic()) {
165 return TTIForTargetIntrinsicsOnly.instCombineIntrinsic(IC&: *this, II);
166 }
167 return std::nullopt;
168}
169
170std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
171 IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
172 bool &KnownBitsComputed) {
173 // Handle target specific intrinsics
174 if (II.getCalledFunction()->isTargetIntrinsic()) {
175 return TTIForTargetIntrinsicsOnly.simplifyDemandedUseBitsIntrinsic(
176 IC&: *this, II, DemandedMask, Known, KnownBitsComputed);
177 }
178 return std::nullopt;
179}
180
181std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
182 IntrinsicInst &II, APInt DemandedElts, APInt &PoisonElts,
183 APInt &PoisonElts2, APInt &PoisonElts3,
184 std::function<void(Instruction *, unsigned, APInt, APInt &)>
185 SimplifyAndSetOp) {
186 // Handle target specific intrinsics
187 if (II.getCalledFunction()->isTargetIntrinsic()) {
188 return TTIForTargetIntrinsicsOnly.simplifyDemandedVectorEltsIntrinsic(
189 IC&: *this, II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
190 SimplifyAndSetOp);
191 }
192 return std::nullopt;
193}
194
195bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
196 // Approved exception for TTI use: This queries a legality property of the
197 // target, not an profitability heuristic. Ideally this should be part of
198 // DataLayout instead.
199 return TTIForTargetIntrinsicsOnly.isValidAddrSpaceCast(FromAS, ToAS);
200}
201
202Value *InstCombinerImpl::EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP) {
203 if (!RewriteGEP)
204 return llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
205
206 IRBuilderBase::InsertPointGuard Guard(Builder);
207 auto *Inst = dyn_cast<Instruction>(Val: GEP);
208 if (Inst)
209 Builder.SetInsertPoint(Inst);
210
211 Value *Offset = EmitGEPOffset(GEP);
212 // Rewrite non-trivial GEPs to avoid duplicating the offset arithmetic.
213 if (Inst && !GEP->hasAllConstantIndices() &&
214 !GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8)) {
215 replaceInstUsesWith(
216 I&: *Inst, V: Builder.CreateGEP(Ty: Builder.getInt8Ty(), Ptr: GEP->getPointerOperand(),
217 IdxList: Offset, Name: "", NW: GEP->getNoWrapFlags()));
218 eraseInstFromFunction(I&: *Inst);
219 }
220 return Offset;
221}
222
223Value *InstCombinerImpl::EmitGEPOffsets(ArrayRef<GEPOperator *> GEPs,
224 GEPNoWrapFlags NW, Type *IdxTy,
225 bool RewriteGEPs) {
226 auto Add = [&](Value *Sum, Value *Offset) -> Value * {
227 if (Sum)
228 return Builder.CreateAdd(LHS: Sum, RHS: Offset, Name: "", HasNUW: NW.hasNoUnsignedWrap(),
229 HasNSW: NW.isInBounds());
230 else
231 return Offset;
232 };
233
234 Value *Sum = nullptr;
235 Value *OneUseSum = nullptr;
236 Value *OneUseBase = nullptr;
237 GEPNoWrapFlags OneUseFlags = GEPNoWrapFlags::all();
238 for (GEPOperator *GEP : reverse(C&: GEPs)) {
239 Value *Offset;
240 {
241 // Expand the offset at the point of the previous GEP to enable rewriting.
242 // However, use the original insertion point for calculating Sum.
243 IRBuilderBase::InsertPointGuard Guard(Builder);
244 auto *Inst = dyn_cast<Instruction>(Val: GEP);
245 if (RewriteGEPs && Inst)
246 Builder.SetInsertPoint(Inst);
247
248 Offset = llvm::emitGEPOffset(Builder: &Builder, DL, GEP);
249 if (Offset->getType() != IdxTy)
250 Offset = Builder.CreateVectorSplat(
251 EC: cast<VectorType>(Val: IdxTy)->getElementCount(), V: Offset);
252 if (GEP->hasOneUse()) {
253 // Offsets of one-use GEPs will be merged into the next multi-use GEP.
254 OneUseSum = Add(OneUseSum, Offset);
255 OneUseFlags = OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags());
256 if (!OneUseBase)
257 OneUseBase = GEP->getPointerOperand();
258 continue;
259 }
260
261 if (OneUseSum)
262 Offset = Add(OneUseSum, Offset);
263
264 // Rewrite the GEP to reuse the computed offset. This also includes
265 // offsets from preceding one-use GEPs.
266 if (RewriteGEPs && Inst &&
267 !(GEP->getSourceElementType()->isIntegerTy(Bitwidth: 8) &&
268 GEP->getOperand(i_nocapture: 1) == Offset)) {
269 replaceInstUsesWith(
270 I&: *Inst,
271 V: Builder.CreatePtrAdd(
272 Ptr: OneUseBase ? OneUseBase : GEP->getPointerOperand(), Offset, Name: "",
273 NW: OneUseFlags.intersectForOffsetAdd(Other: GEP->getNoWrapFlags())));
274 eraseInstFromFunction(I&: *Inst);
275 }
276 }
277
278 Sum = Add(Sum, Offset);
279 OneUseSum = OneUseBase = nullptr;
280 OneUseFlags = GEPNoWrapFlags::all();
281 }
282 if (OneUseSum)
283 Sum = Add(Sum, OneUseSum);
284 if (!Sum)
285 return Constant::getNullValue(Ty: IdxTy);
286 return Sum;
287}
288
289/// Legal integers and common types are considered desirable. This is used to
290/// avoid creating instructions with types that may not be supported well by the
291/// the backend.
292/// NOTE: This treats i8, i16 and i32 specially because they are common
293/// types in frontend languages.
294bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
295 switch (BitWidth) {
296 case 8:
297 case 16:
298 case 32:
299 return true;
300 default:
301 return DL.isLegalInteger(Width: BitWidth);
302 }
303}
304
305/// Return true if it is desirable to convert an integer computation from a
306/// given bit width to a new bit width.
307/// We don't want to convert from a legal or desirable type (like i8) to an
308/// illegal type or from a smaller to a larger illegal type. A width of '1'
309/// is always treated as a desirable type because i1 is a fundamental type in
310/// IR, and there are many specialized optimizations for i1 types.
311/// Common/desirable widths are equally treated as legal to convert to, in
312/// order to open up more combining opportunities.
313bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
314 unsigned ToWidth) const {
315 bool FromLegal = FromWidth == 1 || DL.isLegalInteger(Width: FromWidth);
316 bool ToLegal = ToWidth == 1 || DL.isLegalInteger(Width: ToWidth);
317
318 // Convert to desirable widths even if they are not legal types.
319 // Only shrink types, to prevent infinite loops.
320 if (ToWidth < FromWidth && isDesirableIntType(BitWidth: ToWidth))
321 return true;
322
323 // If this is a legal or desiable integer from type, and the result would be
324 // an illegal type, don't do the transformation.
325 if ((FromLegal || isDesirableIntType(BitWidth: FromWidth)) && !ToLegal)
326 return false;
327
328 // Otherwise, if both are illegal, do not increase the size of the result. We
329 // do allow things like i160 -> i64, but not i64 -> i160.
330 if (!FromLegal && !ToLegal && ToWidth > FromWidth)
331 return false;
332
333 return true;
334}
335
336/// Return true if it is desirable to convert a computation from 'From' to 'To'.
337/// We don't want to convert from a legal to an illegal type or from a smaller
338/// to a larger illegal type. i1 is always treated as a legal type because it is
339/// a fundamental type in IR, and there are many specialized optimizations for
340/// i1 types.
341bool InstCombinerImpl::shouldChangeType(Type *From, Type *To) const {
342 // TODO: This could be extended to allow vectors. Datalayout changes might be
343 // needed to properly support that.
344 if (!From->isIntegerTy() || !To->isIntegerTy())
345 return false;
346
347 unsigned FromWidth = From->getPrimitiveSizeInBits();
348 unsigned ToWidth = To->getPrimitiveSizeInBits();
349 return shouldChangeType(FromWidth, ToWidth);
350}
351
352// Return true, if No Signed Wrap should be maintained for I.
353// The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
354// where both B and C should be ConstantInts, results in a constant that does
355// not overflow. This function only handles the Add/Sub/Mul opcodes. For
356// all other opcodes, the function conservatively returns false.
357static bool maintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {
358 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
359 if (!OBO || !OBO->hasNoSignedWrap())
360 return false;
361
362 const APInt *BVal, *CVal;
363 if (!match(V: B, P: m_APInt(Res&: BVal)) || !match(V: C, P: m_APInt(Res&: CVal)))
364 return false;
365
366 // We reason about Add/Sub/Mul Only.
367 bool Overflow = false;
368 switch (I.getOpcode()) {
369 case Instruction::Add:
370 (void)BVal->sadd_ov(RHS: *CVal, Overflow);
371 break;
372 case Instruction::Sub:
373 (void)BVal->ssub_ov(RHS: *CVal, Overflow);
374 break;
375 case Instruction::Mul:
376 (void)BVal->smul_ov(RHS: *CVal, Overflow);
377 break;
378 default:
379 // Conservatively return false for other opcodes.
380 return false;
381 }
382 return !Overflow;
383}
384
385static bool hasNoUnsignedWrap(BinaryOperator &I) {
386 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
387 return OBO && OBO->hasNoUnsignedWrap();
388}
389
390static bool hasNoSignedWrap(BinaryOperator &I) {
391 auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: &I);
392 return OBO && OBO->hasNoSignedWrap();
393}
394
395/// Conservatively clears subclassOptionalData after a reassociation or
396/// commutation. We preserve fast-math flags when applicable as they can be
397/// preserved.
398static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
399 FPMathOperator *FPMO = dyn_cast<FPMathOperator>(Val: &I);
400 if (!FPMO) {
401 I.clearSubclassOptionalData();
402 return;
403 }
404
405 FastMathFlags FMF = I.getFastMathFlags();
406 I.clearSubclassOptionalData();
407 I.setFastMathFlags(FMF);
408}
409
410/// Combine constant operands of associative operations either before or after a
411/// cast to eliminate one of the associative operations:
412/// (op (cast (op X, C2)), C1) --> (cast (op X, op (C1, C2)))
413/// (op (cast (op X, C2)), C1) --> (op (cast X), op (C1, C2))
414static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
415 InstCombinerImpl &IC) {
416 auto *Cast = dyn_cast<CastInst>(Val: BinOp1->getOperand(i_nocapture: 0));
417 if (!Cast || !Cast->hasOneUse())
418 return false;
419
420 // TODO: Enhance logic for other casts and remove this check.
421 auto CastOpcode = Cast->getOpcode();
422 if (CastOpcode != Instruction::ZExt)
423 return false;
424
425 // TODO: Enhance logic for other BinOps and remove this check.
426 if (!BinOp1->isBitwiseLogicOp())
427 return false;
428
429 auto AssocOpcode = BinOp1->getOpcode();
430 auto *BinOp2 = dyn_cast<BinaryOperator>(Val: Cast->getOperand(i_nocapture: 0));
431 if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
432 return false;
433
434 Constant *C1, *C2;
435 if (!match(V: BinOp1->getOperand(i_nocapture: 1), P: m_Constant(C&: C1)) ||
436 !match(V: BinOp2->getOperand(i_nocapture: 1), P: m_Constant(C&: C2)))
437 return false;
438
439 // TODO: This assumes a zext cast.
440 // Eg, if it was a trunc, we'd cast C1 to the source type because casting C2
441 // to the destination type might lose bits.
442
443 // Fold the constants together in the destination type:
444 // (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
445 const DataLayout &DL = IC.getDataLayout();
446 Type *DestTy = C1->getType();
447 Constant *CastC2 = ConstantFoldCastOperand(Opcode: CastOpcode, C: C2, DestTy, DL);
448 if (!CastC2)
449 return false;
450 Constant *FoldedC = ConstantFoldBinaryOpOperands(Opcode: AssocOpcode, LHS: C1, RHS: CastC2, DL);
451 if (!FoldedC)
452 return false;
453
454 IC.replaceOperand(I&: *Cast, OpNum: 0, V: BinOp2->getOperand(i_nocapture: 0));
455 IC.replaceOperand(I&: *BinOp1, OpNum: 1, V: FoldedC);
456 BinOp1->dropPoisonGeneratingFlags();
457 Cast->dropPoisonGeneratingFlags();
458 return true;
459}
460
461// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
462// inttoptr ( ptrtoint (x) ) --> x
463Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
464 auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
465 if (IntToPtr && DL.getTypeSizeInBits(Ty: IntToPtr->getDestTy()) ==
466 DL.getTypeSizeInBits(Ty: IntToPtr->getSrcTy())) {
467 auto *PtrToInt = dyn_cast<PtrToIntInst>(Val: IntToPtr->getOperand(i_nocapture: 0));
468 Type *CastTy = IntToPtr->getDestTy();
469 if (PtrToInt &&
470 CastTy->getPointerAddressSpace() ==
471 PtrToInt->getSrcTy()->getPointerAddressSpace() &&
472 DL.getTypeSizeInBits(Ty: PtrToInt->getSrcTy()) ==
473 DL.getTypeSizeInBits(Ty: PtrToInt->getDestTy()))
474 return PtrToInt->getOperand(i_nocapture: 0);
475 }
476 return nullptr;
477}
478
479/// This performs a few simplifications for operators that are associative or
480/// commutative:
481///
482/// Commutative operators:
483///
484/// 1. Order operands such that they are listed from right (least complex) to
485/// left (most complex). This puts constants before unary operators before
486/// binary operators.
487///
488/// Associative operators:
489///
490/// 2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
491/// 3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
492///
493/// Associative and commutative operators:
494///
495/// 4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
496/// 5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
497/// 6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
498/// if C1 and C2 are constants.
499bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
500 Instruction::BinaryOps Opcode = I.getOpcode();
501 bool Changed = false;
502
503 do {
504 // Order operands such that they are listed from right (least complex) to
505 // left (most complex). This puts constants before unary operators before
506 // binary operators.
507 if (I.isCommutative() && getComplexity(V: I.getOperand(i_nocapture: 0)) <
508 getComplexity(V: I.getOperand(i_nocapture: 1)))
509 Changed = !I.swapOperands();
510
511 if (I.isCommutative()) {
512 if (auto Pair = matchSymmetricPair(LHS: I.getOperand(i_nocapture: 0), RHS: I.getOperand(i_nocapture: 1))) {
513 replaceOperand(I, OpNum: 0, V: Pair->first);
514 replaceOperand(I, OpNum: 1, V: Pair->second);
515 Changed = true;
516 }
517 }
518
519 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 0));
520 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: I.getOperand(i_nocapture: 1));
521
522 if (I.isAssociative()) {
523 // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
524 if (Op0 && Op0->getOpcode() == Opcode) {
525 Value *A = Op0->getOperand(i_nocapture: 0);
526 Value *B = Op0->getOperand(i_nocapture: 1);
527 Value *C = I.getOperand(i_nocapture: 1);
528
529 // Does "B op C" simplify?
530 if (Value *V = simplifyBinOp(Opcode, LHS: B, RHS: C, Q: SQ.getWithInstruction(I: &I))) {
531 // It simplifies to V. Form "A op V".
532 replaceOperand(I, OpNum: 0, V: A);
533 replaceOperand(I, OpNum: 1, V);
534 bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(I&: *Op0);
535 bool IsNSW = maintainNoSignedWrap(I, B, C) && hasNoSignedWrap(I&: *Op0);
536
537 // Conservatively clear all optional flags since they may not be
538 // preserved by the reassociation. Reset nsw/nuw based on the above
539 // analysis.
540 ClearSubclassDataAfterReassociation(I);
541
542 // Note: this is only valid because SimplifyBinOp doesn't look at
543 // the operands to Op0.
544 if (IsNUW)
545 I.setHasNoUnsignedWrap(true);
546
547 if (IsNSW)
548 I.setHasNoSignedWrap(true);
549
550 Changed = true;
551 ++NumReassoc;
552 continue;
553 }
554 }
555
556 // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
557 if (Op1 && Op1->getOpcode() == Opcode) {
558 Value *A = I.getOperand(i_nocapture: 0);
559 Value *B = Op1->getOperand(i_nocapture: 0);
560 Value *C = Op1->getOperand(i_nocapture: 1);
561
562 // Does "A op B" simplify?
563 if (Value *V = simplifyBinOp(Opcode, LHS: A, RHS: B, Q: SQ.getWithInstruction(I: &I))) {
564 // It simplifies to V. Form "V op C".
565 replaceOperand(I, OpNum: 0, V);
566 replaceOperand(I, OpNum: 1, V: C);
567 // Conservatively clear the optional flags, since they may not be
568 // preserved by the reassociation.
569 ClearSubclassDataAfterReassociation(I);
570 Changed = true;
571 ++NumReassoc;
572 continue;
573 }
574 }
575 }
576
577 if (I.isAssociative() && I.isCommutative()) {
578 if (simplifyAssocCastAssoc(BinOp1: &I, IC&: *this)) {
579 Changed = true;
580 ++NumReassoc;
581 continue;
582 }
583
584 // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
585 if (Op0 && Op0->getOpcode() == Opcode) {
586 Value *A = Op0->getOperand(i_nocapture: 0);
587 Value *B = Op0->getOperand(i_nocapture: 1);
588 Value *C = I.getOperand(i_nocapture: 1);
589
590 // Does "C op A" simplify?
591 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
592 // It simplifies to V. Form "V op B".
593 replaceOperand(I, OpNum: 0, V);
594 replaceOperand(I, OpNum: 1, V: B);
595 // Conservatively clear the optional flags, since they may not be
596 // preserved by the reassociation.
597 ClearSubclassDataAfterReassociation(I);
598 Changed = true;
599 ++NumReassoc;
600 continue;
601 }
602 }
603
604 // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
605 if (Op1 && Op1->getOpcode() == Opcode) {
606 Value *A = I.getOperand(i_nocapture: 0);
607 Value *B = Op1->getOperand(i_nocapture: 0);
608 Value *C = Op1->getOperand(i_nocapture: 1);
609
610 // Does "C op A" simplify?
611 if (Value *V = simplifyBinOp(Opcode, LHS: C, RHS: A, Q: SQ.getWithInstruction(I: &I))) {
612 // It simplifies to V. Form "B op V".
613 replaceOperand(I, OpNum: 0, V: B);
614 replaceOperand(I, OpNum: 1, V);
615 // Conservatively clear the optional flags, since they may not be
616 // preserved by the reassociation.
617 ClearSubclassDataAfterReassociation(I);
618 Changed = true;
619 ++NumReassoc;
620 continue;
621 }
622 }
623
624 // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
625 // if C1 and C2 are constants.
626 Value *A, *B;
627 Constant *C1, *C2, *CRes;
628 if (Op0 && Op1 &&
629 Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
630 match(V: Op0, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: A), R: m_Constant(C&: C1)))) &&
631 match(V: Op1, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: B), R: m_Constant(C&: C2)))) &&
632 (CRes = ConstantFoldBinaryOpOperands(Opcode, LHS: C1, RHS: C2, DL))) {
633 bool IsNUW = hasNoUnsignedWrap(I) &&
634 hasNoUnsignedWrap(I&: *Op0) &&
635 hasNoUnsignedWrap(I&: *Op1);
636 BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ?
637 BinaryOperator::CreateNUW(Opc: Opcode, V1: A, V2: B) :
638 BinaryOperator::Create(Op: Opcode, S1: A, S2: B);
639
640 if (isa<FPMathOperator>(Val: NewBO)) {
641 FastMathFlags Flags = I.getFastMathFlags() &
642 Op0->getFastMathFlags() &
643 Op1->getFastMathFlags();
644 NewBO->setFastMathFlags(Flags);
645 }
646 InsertNewInstWith(New: NewBO, Old: I.getIterator());
647 NewBO->takeName(V: Op1);
648 replaceOperand(I, OpNum: 0, V: NewBO);
649 replaceOperand(I, OpNum: 1, V: CRes);
650 // Conservatively clear the optional flags, since they may not be
651 // preserved by the reassociation.
652 ClearSubclassDataAfterReassociation(I);
653 if (IsNUW)
654 I.setHasNoUnsignedWrap(true);
655
656 Changed = true;
657 continue;
658 }
659 }
660
661 // No further simplifications.
662 return Changed;
663 } while (true);
664}
665
666/// Return whether "X LOp (Y ROp Z)" is always equal to
667/// "(X LOp Y) ROp (X LOp Z)".
668static bool leftDistributesOverRight(Instruction::BinaryOps LOp,
669 Instruction::BinaryOps ROp) {
670 // X & (Y | Z) <--> (X & Y) | (X & Z)
671 // X & (Y ^ Z) <--> (X & Y) ^ (X & Z)
672 if (LOp == Instruction::And)
673 return ROp == Instruction::Or || ROp == Instruction::Xor;
674
675 // X | (Y & Z) <--> (X | Y) & (X | Z)
676 if (LOp == Instruction::Or)
677 return ROp == Instruction::And;
678
679 // X * (Y + Z) <--> (X * Y) + (X * Z)
680 // X * (Y - Z) <--> (X * Y) - (X * Z)
681 if (LOp == Instruction::Mul)
682 return ROp == Instruction::Add || ROp == Instruction::Sub;
683
684 return false;
685}
686
687/// Return whether "(X LOp Y) ROp Z" is always equal to
688/// "(X ROp Z) LOp (Y ROp Z)".
689static bool rightDistributesOverLeft(Instruction::BinaryOps LOp,
690 Instruction::BinaryOps ROp) {
691 if (Instruction::isCommutative(Opcode: ROp))
692 return leftDistributesOverRight(LOp: ROp, ROp: LOp);
693
694 // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts.
695 return Instruction::isBitwiseLogicOp(Opcode: LOp) && Instruction::isShift(Opcode: ROp);
696
697 // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
698 // but this requires knowing that the addition does not overflow and other
699 // such subtleties.
700}
701
702/// This function returns identity value for given opcode, which can be used to
703/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
704static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
705 if (isa<Constant>(Val: V))
706 return nullptr;
707
708 return ConstantExpr::getBinOpIdentity(Opcode, Ty: V->getType());
709}
710
711/// This function predicates factorization using distributive laws. By default,
712/// it just returns the 'Op' inputs. But for special-cases like
713/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add
714/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to
715/// allow more factorization opportunities.
716static Instruction::BinaryOps
717getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op,
718 Value *&LHS, Value *&RHS, BinaryOperator *OtherOp) {
719 assert(Op && "Expected a binary operator");
720 LHS = Op->getOperand(i_nocapture: 0);
721 RHS = Op->getOperand(i_nocapture: 1);
722 if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) {
723 Constant *C;
724 if (match(V: Op, P: m_Shl(L: m_Value(), R: m_ImmConstant(C)))) {
725 // X << C --> X * (1 << C)
726 RHS = ConstantFoldBinaryInstruction(
727 Opcode: Instruction::Shl, V1: ConstantInt::get(Ty: Op->getType(), V: 1), V2: C);
728 assert(RHS && "Constant folding of immediate constants failed");
729 return Instruction::Mul;
730 }
731 // TODO: We can add other conversions e.g. shr => div etc.
732 }
733 if (Instruction::isBitwiseLogicOp(Opcode: TopOpcode)) {
734 if (OtherOp && OtherOp->getOpcode() == Instruction::AShr &&
735 match(V: Op, P: m_LShr(L: m_NonNegative(), R: m_Value()))) {
736 // lshr nneg C, X --> ashr nneg C, X
737 return Instruction::AShr;
738 }
739 }
740 return Op->getOpcode();
741}
742
743/// This tries to simplify binary operations by factorizing out common terms
744/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
745static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
746 InstCombiner::BuilderTy &Builder,
747 Instruction::BinaryOps InnerOpcode, Value *A,
748 Value *B, Value *C, Value *D) {
749 assert(A && B && C && D && "All values must be provided");
750
751 Value *V = nullptr;
752 Value *RetVal = nullptr;
753 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
754 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
755
756 // Does "X op' Y" always equal "Y op' X"?
757 bool InnerCommutative = Instruction::isCommutative(Opcode: InnerOpcode);
758
759 // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
760 if (leftDistributesOverRight(LOp: InnerOpcode, ROp: TopLevelOpcode)) {
761 // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
762 // commutative case, "(A op' B) op (C op' A)"?
763 if (A == C || (InnerCommutative && A == D)) {
764 if (A != C)
765 std::swap(a&: C, b&: D);
766 // Consider forming "A op' (B op D)".
767 // If "B op D" simplifies then it can be formed with no cost.
768 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: D, Q: SQ.getWithInstruction(I: &I));
769
770 // If "B op D" doesn't simplify then only go on if one of the existing
771 // operations "A op' B" and "C op' D" will be zapped as no longer used.
772 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
773 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: D, Name: RHS->getName());
774 if (V)
775 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: A, RHS: V);
776 }
777 }
778
779 // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
780 if (!RetVal && rightDistributesOverLeft(LOp: TopLevelOpcode, ROp: InnerOpcode)) {
781 // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
782 // commutative case, "(A op' B) op (B op' D)"?
783 if (B == D || (InnerCommutative && B == C)) {
784 if (B != D)
785 std::swap(a&: C, b&: D);
786 // Consider forming "(A op C) op' B".
787 // If "A op C" simplifies then it can be formed with no cost.
788 V = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQ.getWithInstruction(I: &I));
789
790 // If "A op C" doesn't simplify then only go on if one of the existing
791 // operations "A op' B" and "C op' D" will be zapped as no longer used.
792 if (!V && (LHS->hasOneUse() || RHS->hasOneUse()))
793 V = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C, Name: LHS->getName());
794 if (V)
795 RetVal = Builder.CreateBinOp(Opc: InnerOpcode, LHS: V, RHS: B);
796 }
797 }
798
799 if (!RetVal)
800 return nullptr;
801
802 ++NumFactor;
803 RetVal->takeName(V: &I);
804
805 // Try to add no-overflow flags to the final value.
806 if (isa<BinaryOperator>(Val: RetVal)) {
807 bool HasNSW = false;
808 bool HasNUW = false;
809 if (isa<OverflowingBinaryOperator>(Val: &I)) {
810 HasNSW = I.hasNoSignedWrap();
811 HasNUW = I.hasNoUnsignedWrap();
812 }
813 if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(Val: LHS)) {
814 HasNSW &= LOBO->hasNoSignedWrap();
815 HasNUW &= LOBO->hasNoUnsignedWrap();
816 }
817
818 if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(Val: RHS)) {
819 HasNSW &= ROBO->hasNoSignedWrap();
820 HasNUW &= ROBO->hasNoUnsignedWrap();
821 }
822
823 if (TopLevelOpcode == Instruction::Add && InnerOpcode == Instruction::Mul) {
824 // We can propagate 'nsw' if we know that
825 // %Y = mul nsw i16 %X, C
826 // %Z = add nsw i16 %Y, %X
827 // =>
828 // %Z = mul nsw i16 %X, C+1
829 //
830 // iff C+1 isn't INT_MIN
831 const APInt *CInt;
832 if (match(V, P: m_APInt(Res&: CInt)) && !CInt->isMinSignedValue())
833 cast<Instruction>(Val: RetVal)->setHasNoSignedWrap(HasNSW);
834
835 // nuw can be propagated with any constant or nuw value.
836 cast<Instruction>(Val: RetVal)->setHasNoUnsignedWrap(HasNUW);
837 }
838 }
839 return RetVal;
840}
841
842// If `I` has one Const operand and the other matches `(ctpop (not x))`,
843// replace `(ctpop (not x))` with `(sub nuw nsw BitWidth(x), (ctpop x))`.
844// This is only useful is the new subtract can fold so we only handle the
845// following cases:
846// 1) (add/sub/disjoint_or C, (ctpop (not x))
847// -> (add/sub/disjoint_or C', (ctpop x))
848// 1) (cmp pred C, (ctpop (not x))
849// -> (cmp pred C', (ctpop x))
850Instruction *InstCombinerImpl::tryFoldInstWithCtpopWithNot(Instruction *I) {
851 unsigned Opc = I->getOpcode();
852 unsigned ConstIdx = 1;
853 switch (Opc) {
854 default:
855 return nullptr;
856 // (ctpop (not x)) <-> (sub nuw nsw BitWidth(x) - (ctpop x))
857 // We can fold the BitWidth(x) with add/sub/icmp as long the other operand
858 // is constant.
859 case Instruction::Sub:
860 ConstIdx = 0;
861 break;
862 case Instruction::ICmp:
863 // Signed predicates aren't correct in some edge cases like for i2 types, as
864 // well since (ctpop x) is known [0, log2(BitWidth(x))] almost all signed
865 // comparisons against it are simplfied to unsigned.
866 if (cast<ICmpInst>(Val: I)->isSigned())
867 return nullptr;
868 break;
869 case Instruction::Or:
870 if (!match(V: I, P: m_DisjointOr(L: m_Value(), R: m_Value())))
871 return nullptr;
872 [[fallthrough]];
873 case Instruction::Add:
874 break;
875 }
876
877 Value *Op;
878 // Find ctpop.
879 if (!match(V: I->getOperand(i: 1 - ConstIdx),
880 P: m_OneUse(SubPattern: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: Op)))))
881 return nullptr;
882
883 Constant *C;
884 // Check other operand is ImmConstant.
885 if (!match(V: I->getOperand(i: ConstIdx), P: m_ImmConstant(C)))
886 return nullptr;
887
888 Type *Ty = Op->getType();
889 Constant *BitWidthC = ConstantInt::get(Ty, V: Ty->getScalarSizeInBits());
890 // Need extra check for icmp. Note if this check is true, it generally means
891 // the icmp will simplify to true/false.
892 if (Opc == Instruction::ICmp && !cast<ICmpInst>(Val: I)->isEquality()) {
893 Constant *Cmp =
894 ConstantFoldCompareInstOperands(Predicate: ICmpInst::ICMP_UGT, LHS: C, RHS: BitWidthC, DL);
895 if (!Cmp || !Cmp->isZeroValue())
896 return nullptr;
897 }
898
899 // Check we can invert `(not x)` for free.
900 bool Consumes = false;
901 if (!isFreeToInvert(V: Op, WillInvertAllUses: Op->hasOneUse(), DoesConsume&: Consumes) || !Consumes)
902 return nullptr;
903 Value *NotOp = getFreelyInverted(V: Op, WillInvertAllUses: Op->hasOneUse(), Builder: &Builder);
904 assert(NotOp != nullptr &&
905 "Desync between isFreeToInvert and getFreelyInverted");
906
907 Value *CtpopOfNotOp = Builder.CreateIntrinsic(RetTy: Ty, ID: Intrinsic::ctpop, Args: NotOp);
908
909 Value *R = nullptr;
910
911 // Do the transformation here to avoid potentially introducing an infinite
912 // loop.
913 switch (Opc) {
914 case Instruction::Sub:
915 R = Builder.CreateAdd(LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: C, C2: BitWidthC));
916 break;
917 case Instruction::Or:
918 case Instruction::Add:
919 R = Builder.CreateSub(LHS: ConstantExpr::getAdd(C1: C, C2: BitWidthC), RHS: CtpopOfNotOp);
920 break;
921 case Instruction::ICmp:
922 R = Builder.CreateICmp(P: cast<ICmpInst>(Val: I)->getSwappedPredicate(),
923 LHS: CtpopOfNotOp, RHS: ConstantExpr::getSub(C1: BitWidthC, C2: C));
924 break;
925 default:
926 llvm_unreachable("Unhandled Opcode");
927 }
928 assert(R != nullptr);
929 return replaceInstUsesWith(I&: *I, V: R);
930}
931
932// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
933// IFF
934// 1) the logic_shifts match
935// 2) either both binops are binops and one is `and` or
936// BinOp1 is `and`
937// (logic_shift (inv_logic_shift C1, C), C) == C1 or
938//
939// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
940//
941// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
942// IFF
943// 1) the logic_shifts match
944// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
945//
946// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
947//
948// (Binop1 (Binop2 (arithmetic_shift X, Amt), Mask), (arithmetic_shift Y, Amt))
949// IFF
950// 1) Binop1 is bitwise logical operator `and`, `or` or `xor`
951// 2) Binop2 is `not`
952//
953// -> (arithmetic_shift Binop1((not X), Y), Amt)
954
955Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
956 const DataLayout &DL = I.getDataLayout();
957 auto IsValidBinOpc = [](unsigned Opc) {
958 switch (Opc) {
959 default:
960 return false;
961 case Instruction::And:
962 case Instruction::Or:
963 case Instruction::Xor:
964 case Instruction::Add:
965 // Skip Sub as we only match constant masks which will canonicalize to use
966 // add.
967 return true;
968 }
969 };
970
971 // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
972 // constraints.
973 auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
974 unsigned ShOpc) {
975 assert(ShOpc != Instruction::AShr);
976 return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
977 ShOpc == Instruction::Shl;
978 };
979
980 auto GetInvShift = [](unsigned ShOpc) {
981 assert(ShOpc != Instruction::AShr);
982 return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
983 };
984
985 auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
986 unsigned ShOpc, Constant *CMask,
987 Constant *CShift) {
988 // If the BinOp1 is `and` we don't need to check the mask.
989 if (BinOpc1 == Instruction::And)
990 return true;
991
992 // For all other possible transfers we need complete distributable
993 // binop/shift (anything but `add` + `lshr`).
994 if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
995 return false;
996
997 // If BinOp2 is `and`, any mask works (this only really helps for non-splat
998 // vecs, otherwise the mask will be simplified and the following check will
999 // handle it).
1000 if (BinOpc2 == Instruction::And)
1001 return true;
1002
1003 // Otherwise, need mask that meets the below requirement.
1004 // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
1005 Constant *MaskInvShift =
1006 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1007 return ConstantFoldBinaryOpOperands(Opcode: ShOpc, LHS: MaskInvShift, RHS: CShift, DL) ==
1008 CMask;
1009 };
1010
1011 auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
1012 Constant *CMask, *CShift;
1013 Value *X, *Y, *ShiftedX, *Mask, *Shift;
1014 if (!match(V: I.getOperand(i_nocapture: ShOpnum),
1015 P: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: Y), R: m_Value(V&: Shift)))))
1016 return nullptr;
1017 if (!match(V: I.getOperand(i_nocapture: 1 - ShOpnum),
1018 P: m_c_BinOp(L: m_CombineAnd(
1019 L: m_OneUse(SubPattern: m_Shift(L: m_Value(V&: X), R: m_Specific(V: Shift))),
1020 R: m_Value(V&: ShiftedX)),
1021 R: m_Value(V&: Mask))))
1022 return nullptr;
1023 // Make sure we are matching instruction shifts and not ConstantExpr
1024 auto *IY = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: ShOpnum));
1025 auto *IX = dyn_cast<Instruction>(Val: ShiftedX);
1026 if (!IY || !IX)
1027 return nullptr;
1028
1029 // LHS and RHS need same shift opcode
1030 unsigned ShOpc = IY->getOpcode();
1031 if (ShOpc != IX->getOpcode())
1032 return nullptr;
1033
1034 // Make sure binop is real instruction and not ConstantExpr
1035 auto *BO2 = dyn_cast<Instruction>(Val: I.getOperand(i_nocapture: 1 - ShOpnum));
1036 if (!BO2)
1037 return nullptr;
1038
1039 unsigned BinOpc = BO2->getOpcode();
1040 // Make sure we have valid binops.
1041 if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
1042 return nullptr;
1043
1044 if (ShOpc == Instruction::AShr) {
1045 if (Instruction::isBitwiseLogicOp(Opcode: I.getOpcode()) &&
1046 BinOpc == Instruction::Xor && match(V: Mask, P: m_AllOnes())) {
1047 Value *NotX = Builder.CreateNot(V: X);
1048 Value *NewBinOp = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NotX);
1049 return BinaryOperator::Create(
1050 Op: static_cast<Instruction::BinaryOps>(ShOpc), S1: NewBinOp, S2: Shift);
1051 }
1052
1053 return nullptr;
1054 }
1055
1056 // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
1057 // distribute to drop the shift irrelevant of constants.
1058 if (BinOpc == I.getOpcode() &&
1059 IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
1060 Value *NewBinOp2 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: X, RHS: Y);
1061 Value *NewBinOp1 = Builder.CreateBinOp(
1062 Opc: static_cast<Instruction::BinaryOps>(ShOpc), LHS: NewBinOp2, RHS: Shift);
1063 return BinaryOperator::Create(Op: I.getOpcode(), S1: NewBinOp1, S2: Mask);
1064 }
1065
1066 // Otherwise we can only distribute by constant shifting the mask, so
1067 // ensure we have constants.
1068 if (!match(V: Shift, P: m_ImmConstant(C&: CShift)))
1069 return nullptr;
1070 if (!match(V: Mask, P: m_ImmConstant(C&: CMask)))
1071 return nullptr;
1072
1073 // Check if we can distribute the binops.
1074 if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
1075 return nullptr;
1076
1077 Constant *NewCMask =
1078 ConstantFoldBinaryOpOperands(Opcode: GetInvShift(ShOpc), LHS: CMask, RHS: CShift, DL);
1079 Value *NewBinOp2 = Builder.CreateBinOp(
1080 Opc: static_cast<Instruction::BinaryOps>(BinOpc), LHS: X, RHS: NewCMask);
1081 Value *NewBinOp1 = Builder.CreateBinOp(Opc: I.getOpcode(), LHS: Y, RHS: NewBinOp2);
1082 return BinaryOperator::Create(Op: static_cast<Instruction::BinaryOps>(ShOpc),
1083 S1: NewBinOp1, S2: CShift);
1084 };
1085
1086 if (Instruction *R = MatchBinOp(0))
1087 return R;
1088 return MatchBinOp(1);
1089}
1090
1091// (Binop (zext C), (select C, T, F))
1092// -> (select C, (binop 1, T), (binop 0, F))
1093//
1094// (Binop (sext C), (select C, T, F))
1095// -> (select C, (binop -1, T), (binop 0, F))
1096//
1097// Attempt to simplify binary operations into a select with folded args, when
1098// one operand of the binop is a select instruction and the other operand is a
1099// zext/sext extension, whose value is the select condition.
1100Instruction *
1101InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
1102 // TODO: this simplification may be extended to any speculatable instruction,
1103 // not just binops, and would possibly be handled better in FoldOpIntoSelect.
1104 Instruction::BinaryOps Opc = I.getOpcode();
1105 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1106 Value *A, *CondVal, *TrueVal, *FalseVal;
1107 Value *CastOp;
1108
1109 auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
1110 return match(V: CastOp, P: m_ZExtOrSExt(Op: m_Value(V&: A))) &&
1111 A->getType()->getScalarSizeInBits() == 1 &&
1112 match(V: SelectOp, P: m_Select(C: m_Value(V&: CondVal), L: m_Value(V&: TrueVal),
1113 R: m_Value(V&: FalseVal)));
1114 };
1115
1116 // Make sure one side of the binop is a select instruction, and the other is a
1117 // zero/sign extension operating on a i1.
1118 if (MatchSelectAndCast(LHS, RHS))
1119 CastOp = LHS;
1120 else if (MatchSelectAndCast(RHS, LHS))
1121 CastOp = RHS;
1122 else
1123 return nullptr;
1124
1125 SelectInst *SI = ProfcheckDisableMetadataFixes
1126 ? nullptr
1127 : cast<SelectInst>(Val: CastOp == LHS ? RHS : LHS);
1128
1129 auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
1130 bool IsCastOpRHS = (CastOp == RHS);
1131 bool IsZExt = isa<ZExtInst>(Val: CastOp);
1132 Constant *C;
1133
1134 if (IsTrueArm) {
1135 C = Constant::getNullValue(Ty: V->getType());
1136 } else if (IsZExt) {
1137 unsigned BitWidth = V->getType()->getScalarSizeInBits();
1138 C = Constant::getIntegerValue(Ty: V->getType(), V: APInt(BitWidth, 1));
1139 } else {
1140 C = Constant::getAllOnesValue(Ty: V->getType());
1141 }
1142
1143 return IsCastOpRHS ? Builder.CreateBinOp(Opc, LHS: V, RHS: C)
1144 : Builder.CreateBinOp(Opc, LHS: C, RHS: V);
1145 };
1146
1147 // If the value used in the zext/sext is the select condition, or the negated
1148 // of the select condition, the binop can be simplified.
1149 if (CondVal == A) {
1150 Value *NewTrueVal = NewFoldedConst(false, TrueVal);
1151 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1152 S2: NewFoldedConst(true, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1153 }
1154
1155 if (match(V: A, P: m_Not(V: m_Specific(V: CondVal)))) {
1156 Value *NewTrueVal = NewFoldedConst(true, TrueVal);
1157 return SelectInst::Create(C: CondVal, S1: NewTrueVal,
1158 S2: NewFoldedConst(false, FalseVal), NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1159 }
1160
1161 return nullptr;
1162}
1163
1164Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
1165 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1166 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1167 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1168 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1169 Value *A, *B, *C, *D;
1170 Instruction::BinaryOps LHSOpcode, RHSOpcode;
1171
1172 if (Op0)
1173 LHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op0, LHS&: A, RHS&: B, OtherOp: Op1);
1174 if (Op1)
1175 RHSOpcode = getBinOpsForFactorization(TopOpcode: TopLevelOpcode, Op: Op1, LHS&: C, RHS&: D, OtherOp: Op0);
1176
1177 // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
1178 // a common term.
1179 if (Op0 && Op1 && LHSOpcode == RHSOpcode)
1180 if (Value *V = tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C, D))
1181 return V;
1182
1183 // The instruction has the form "(A op' B) op (C)". Try to factorize common
1184 // term.
1185 if (Op0)
1186 if (Value *Ident = getIdentityValue(Opcode: LHSOpcode, V: RHS))
1187 if (Value *V =
1188 tryFactorization(I, SQ, Builder, InnerOpcode: LHSOpcode, A, B, C: RHS, D: Ident))
1189 return V;
1190
1191 // The instruction has the form "(B) op (C op' D)". Try to factorize common
1192 // term.
1193 if (Op1)
1194 if (Value *Ident = getIdentityValue(Opcode: RHSOpcode, V: LHS))
1195 if (Value *V =
1196 tryFactorization(I, SQ, Builder, InnerOpcode: RHSOpcode, A: LHS, B: Ident, C, D))
1197 return V;
1198
1199 return nullptr;
1200}
1201
1202/// This tries to simplify binary operations which some other binary operation
1203/// distributes over either by factorizing out common terms
1204/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
1205/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
1206/// Returns the simplified value, or null if it didn't simplify.
1207Value *InstCombinerImpl::foldUsingDistributiveLaws(BinaryOperator &I) {
1208 Value *LHS = I.getOperand(i_nocapture: 0), *RHS = I.getOperand(i_nocapture: 1);
1209 BinaryOperator *Op0 = dyn_cast<BinaryOperator>(Val: LHS);
1210 BinaryOperator *Op1 = dyn_cast<BinaryOperator>(Val: RHS);
1211 Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
1212
1213 // Factorization.
1214 if (Value *R = tryFactorizationFolds(I))
1215 return R;
1216
1217 // Expansion.
1218 if (Op0 && rightDistributesOverLeft(LOp: Op0->getOpcode(), ROp: TopLevelOpcode)) {
1219 // The instruction has the form "(A op' B) op C". See if expanding it out
1220 // to "(A op C) op' (B op C)" results in simplifications.
1221 Value *A = Op0->getOperand(i_nocapture: 0), *B = Op0->getOperand(i_nocapture: 1), *C = RHS;
1222 Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
1223
1224 // Disable the use of undef because it's not safe to distribute undef.
1225 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1226 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1227 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: B, RHS: C, Q: SQDistributive);
1228
1229 // Do "A op C" and "B op C" both simplify?
1230 if (L && R) {
1231 // They do! Return "L op' R".
1232 ++NumExpand;
1233 C = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1234 C->takeName(V: &I);
1235 return C;
1236 }
1237
1238 // Does "A op C" simplify to the identity value for the inner opcode?
1239 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1240 // They do! Return "B op C".
1241 ++NumExpand;
1242 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: B, RHS: C);
1243 C->takeName(V: &I);
1244 return C;
1245 }
1246
1247 // Does "B op C" simplify to the identity value for the inner opcode?
1248 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1249 // They do! Return "A op C".
1250 ++NumExpand;
1251 C = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1252 C->takeName(V: &I);
1253 return C;
1254 }
1255 }
1256
1257 if (Op1 && leftDistributesOverRight(LOp: TopLevelOpcode, ROp: Op1->getOpcode())) {
1258 // The instruction has the form "A op (B op' C)". See if expanding it out
1259 // to "(A op B) op' (A op C)" results in simplifications.
1260 Value *A = LHS, *B = Op1->getOperand(i_nocapture: 0), *C = Op1->getOperand(i_nocapture: 1);
1261 Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
1262
1263 // Disable the use of undef because it's not safe to distribute undef.
1264 auto SQDistributive = SQ.getWithInstruction(I: &I).getWithoutUndef();
1265 Value *L = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: B, Q: SQDistributive);
1266 Value *R = simplifyBinOp(Opcode: TopLevelOpcode, LHS: A, RHS: C, Q: SQDistributive);
1267
1268 // Do "A op B" and "A op C" both simplify?
1269 if (L && R) {
1270 // They do! Return "L op' R".
1271 ++NumExpand;
1272 A = Builder.CreateBinOp(Opc: InnerOpcode, LHS: L, RHS: R);
1273 A->takeName(V: &I);
1274 return A;
1275 }
1276
1277 // Does "A op B" simplify to the identity value for the inner opcode?
1278 if (L && L == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: L->getType())) {
1279 // They do! Return "A op C".
1280 ++NumExpand;
1281 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: C);
1282 A->takeName(V: &I);
1283 return A;
1284 }
1285
1286 // Does "A op C" simplify to the identity value for the inner opcode?
1287 if (R && R == ConstantExpr::getBinOpIdentity(Opcode: InnerOpcode, Ty: R->getType())) {
1288 // They do! Return "A op B".
1289 ++NumExpand;
1290 A = Builder.CreateBinOp(Opc: TopLevelOpcode, LHS: A, RHS: B);
1291 A->takeName(V: &I);
1292 return A;
1293 }
1294 }
1295
1296 return SimplifySelectsFeedingBinaryOp(I, LHS, RHS);
1297}
1298
1299static std::optional<std::pair<Value *, Value *>>
1300matchSymmetricPhiNodesPair(PHINode *LHS, PHINode *RHS) {
1301 if (LHS->getParent() != RHS->getParent())
1302 return std::nullopt;
1303
1304 if (LHS->getNumIncomingValues() < 2)
1305 return std::nullopt;
1306
1307 if (!equal(LRange: LHS->blocks(), RRange: RHS->blocks()))
1308 return std::nullopt;
1309
1310 Value *L0 = LHS->getIncomingValue(i: 0);
1311 Value *R0 = RHS->getIncomingValue(i: 0);
1312
1313 for (unsigned I = 1, E = LHS->getNumIncomingValues(); I != E; ++I) {
1314 Value *L1 = LHS->getIncomingValue(i: I);
1315 Value *R1 = RHS->getIncomingValue(i: I);
1316
1317 if ((L0 == L1 && R0 == R1) || (L0 == R1 && R0 == L1))
1318 continue;
1319
1320 return std::nullopt;
1321 }
1322
1323 return std::optional(std::pair(L0, R0));
1324}
1325
1326std::optional<std::pair<Value *, Value *>>
1327InstCombinerImpl::matchSymmetricPair(Value *LHS, Value *RHS) {
1328 Instruction *LHSInst = dyn_cast<Instruction>(Val: LHS);
1329 Instruction *RHSInst = dyn_cast<Instruction>(Val: RHS);
1330 if (!LHSInst || !RHSInst || LHSInst->getOpcode() != RHSInst->getOpcode())
1331 return std::nullopt;
1332 switch (LHSInst->getOpcode()) {
1333 case Instruction::PHI:
1334 return matchSymmetricPhiNodesPair(LHS: cast<PHINode>(Val: LHS), RHS: cast<PHINode>(Val: RHS));
1335 case Instruction::Select: {
1336 Value *Cond = LHSInst->getOperand(i: 0);
1337 Value *TrueVal = LHSInst->getOperand(i: 1);
1338 Value *FalseVal = LHSInst->getOperand(i: 2);
1339 if (Cond == RHSInst->getOperand(i: 0) && TrueVal == RHSInst->getOperand(i: 2) &&
1340 FalseVal == RHSInst->getOperand(i: 1))
1341 return std::pair(TrueVal, FalseVal);
1342 return std::nullopt;
1343 }
1344 case Instruction::Call: {
1345 // Match min(a, b) and max(a, b)
1346 MinMaxIntrinsic *LHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: LHSInst);
1347 MinMaxIntrinsic *RHSMinMax = dyn_cast<MinMaxIntrinsic>(Val: RHSInst);
1348 if (LHSMinMax && RHSMinMax &&
1349 LHSMinMax->getPredicate() ==
1350 ICmpInst::getSwappedPredicate(pred: RHSMinMax->getPredicate()) &&
1351 ((LHSMinMax->getLHS() == RHSMinMax->getLHS() &&
1352 LHSMinMax->getRHS() == RHSMinMax->getRHS()) ||
1353 (LHSMinMax->getLHS() == RHSMinMax->getRHS() &&
1354 LHSMinMax->getRHS() == RHSMinMax->getLHS())))
1355 return std::pair(LHSMinMax->getLHS(), LHSMinMax->getRHS());
1356 return std::nullopt;
1357 }
1358 default:
1359 return std::nullopt;
1360 }
1361}
1362
1363Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
1364 Value *LHS,
1365 Value *RHS) {
1366 Value *A, *B, *C, *D, *E, *F;
1367 bool LHSIsSelect = match(V: LHS, P: m_Select(C: m_Value(V&: A), L: m_Value(V&: B), R: m_Value(V&: C)));
1368 bool RHSIsSelect = match(V: RHS, P: m_Select(C: m_Value(V&: D), L: m_Value(V&: E), R: m_Value(V&: F)));
1369 if (!LHSIsSelect && !RHSIsSelect)
1370 return nullptr;
1371
1372 SelectInst *SI = ProfcheckDisableMetadataFixes
1373 ? nullptr
1374 : cast<SelectInst>(Val: LHSIsSelect ? LHS : RHS);
1375
1376 FastMathFlags FMF;
1377 BuilderTy::FastMathFlagGuard Guard(Builder);
1378 if (const auto *FPOp = dyn_cast<FPMathOperator>(Val: &I)) {
1379 FMF = FPOp->getFastMathFlags();
1380 Builder.setFastMathFlags(FMF);
1381 }
1382
1383 Instruction::BinaryOps Opcode = I.getOpcode();
1384 SimplifyQuery Q = SQ.getWithInstruction(I: &I);
1385
1386 Value *Cond, *True = nullptr, *False = nullptr;
1387
1388 // Special-case for add/negate combination. Replace the zero in the negation
1389 // with the trailing add operand:
1390 // (Cond ? TVal : -N) + Z --> Cond ? True : (Z - N)
1391 // (Cond ? -N : FVal) + Z --> Cond ? (Z - N) : False
1392 auto foldAddNegate = [&](Value *TVal, Value *FVal, Value *Z) -> Value * {
1393 // We need an 'add' and exactly 1 arm of the select to have been simplified.
1394 if (Opcode != Instruction::Add || (!True && !False) || (True && False))
1395 return nullptr;
1396 Value *N;
1397 if (True && match(V: FVal, P: m_Neg(V: m_Value(V&: N)))) {
1398 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1399 return Builder.CreateSelect(C: Cond, True, False: Sub, Name: I.getName(), MDFrom: SI);
1400 }
1401 if (False && match(V: TVal, P: m_Neg(V: m_Value(V&: N)))) {
1402 Value *Sub = Builder.CreateSub(LHS: Z, RHS: N);
1403 return Builder.CreateSelect(C: Cond, True: Sub, False, Name: I.getName(), MDFrom: SI);
1404 }
1405 return nullptr;
1406 };
1407
1408 if (LHSIsSelect && RHSIsSelect && A == D) {
1409 // (A ? B : C) op (A ? E : F) -> A ? (B op E) : (C op F)
1410 Cond = A;
1411 True = simplifyBinOp(Opcode, LHS: B, RHS: E, FMF, Q);
1412 False = simplifyBinOp(Opcode, LHS: C, RHS: F, FMF, Q);
1413
1414 if (LHS->hasOneUse() && RHS->hasOneUse()) {
1415 if (False && !True)
1416 True = Builder.CreateBinOp(Opc: Opcode, LHS: B, RHS: E);
1417 else if (True && !False)
1418 False = Builder.CreateBinOp(Opc: Opcode, LHS: C, RHS: F);
1419 }
1420 } else if (LHSIsSelect && LHS->hasOneUse()) {
1421 // (A ? B : C) op Y -> A ? (B op Y) : (C op Y)
1422 Cond = A;
1423 True = simplifyBinOp(Opcode, LHS: B, RHS, FMF, Q);
1424 False = simplifyBinOp(Opcode, LHS: C, RHS, FMF, Q);
1425 if (Value *NewSel = foldAddNegate(B, C, RHS))
1426 return NewSel;
1427 } else if (RHSIsSelect && RHS->hasOneUse()) {
1428 // X op (D ? E : F) -> D ? (X op E) : (X op F)
1429 Cond = D;
1430 True = simplifyBinOp(Opcode, LHS, RHS: E, FMF, Q);
1431 False = simplifyBinOp(Opcode, LHS, RHS: F, FMF, Q);
1432 if (Value *NewSel = foldAddNegate(E, F, LHS))
1433 return NewSel;
1434 }
1435
1436 if (!True || !False)
1437 return nullptr;
1438
1439 Value *NewSI = Builder.CreateSelect(C: Cond, True, False, Name: I.getName(), MDFrom: SI);
1440 NewSI->takeName(V: &I);
1441 return NewSI;
1442}
1443
1444/// Freely adapt every user of V as-if V was changed to !V.
1445/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
1446void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
1447 assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
1448 for (User *U : make_early_inc_range(Range: I->users())) {
1449 if (U == IgnoredUser)
1450 continue; // Don't consider this user.
1451 switch (cast<Instruction>(Val: U)->getOpcode()) {
1452 case Instruction::Select: {
1453 auto *SI = cast<SelectInst>(Val: U);
1454 SI->swapValues();
1455 SI->swapProfMetadata();
1456 break;
1457 }
1458 case Instruction::Br: {
1459 BranchInst *BI = cast<BranchInst>(Val: U);
1460 BI->swapSuccessors(); // swaps prof metadata too
1461 if (BPI)
1462 BPI->swapSuccEdgesProbabilities(Src: BI->getParent());
1463 break;
1464 }
1465 case Instruction::Xor:
1466 replaceInstUsesWith(I&: cast<Instruction>(Val&: *U), V: I);
1467 // Add to worklist for DCE.
1468 addToWorklist(I: cast<Instruction>(Val: U));
1469 break;
1470 default:
1471 llvm_unreachable("Got unexpected user - out of sync with "
1472 "canFreelyInvertAllUsersOf() ?");
1473 }
1474 }
1475
1476 // Update pre-existing debug value uses.
1477 SmallVector<DbgVariableRecord *, 4> DbgVariableRecords;
1478 llvm::findDbgValues(V: I, DbgVariableRecords);
1479
1480 for (DbgVariableRecord *DbgVal : DbgVariableRecords) {
1481 SmallVector<uint64_t, 1> Ops = {dwarf::DW_OP_not};
1482 for (unsigned Idx = 0, End = DbgVal->getNumVariableLocationOps();
1483 Idx != End; ++Idx)
1484 if (DbgVal->getVariableLocationOp(OpIdx: Idx) == I)
1485 DbgVal->setExpression(
1486 DIExpression::appendOpsToArg(Expr: DbgVal->getExpression(), Ops, ArgNo: Idx));
1487 }
1488}
1489
1490/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
1491/// constant zero (which is the 'negate' form).
1492Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
1493 Value *NegV;
1494 if (match(V, P: m_Neg(V: m_Value(V&: NegV))))
1495 return NegV;
1496
1497 // Constants can be considered to be negated values if they can be folded.
1498 if (ConstantInt *C = dyn_cast<ConstantInt>(Val: V))
1499 return ConstantExpr::getNeg(C);
1500
1501 if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(Val: V))
1502 if (C->getType()->getElementType()->isIntegerTy())
1503 return ConstantExpr::getNeg(C);
1504
1505 if (ConstantVector *CV = dyn_cast<ConstantVector>(Val: V)) {
1506 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1507 Constant *Elt = CV->getAggregateElement(Elt: i);
1508 if (!Elt)
1509 return nullptr;
1510
1511 if (isa<UndefValue>(Val: Elt))
1512 continue;
1513
1514 if (!isa<ConstantInt>(Val: Elt))
1515 return nullptr;
1516 }
1517 return ConstantExpr::getNeg(C: CV);
1518 }
1519
1520 // Negate integer vector splats.
1521 if (auto *CV = dyn_cast<Constant>(Val: V))
1522 if (CV->getType()->isVectorTy() &&
1523 CV->getType()->getScalarType()->isIntegerTy() && CV->getSplatValue())
1524 return ConstantExpr::getNeg(C: CV);
1525
1526 return nullptr;
1527}
1528
1529// Try to fold:
1530// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1531// -> ({s|u}itofp (int_binop x, y))
1532// 2) (fp_binop ({s|u}itofp x), FpC)
1533// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1534//
1535// Assuming the sign of the cast for x/y is `OpsFromSigned`.
1536Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
1537 BinaryOperator &BO, bool OpsFromSigned, std::array<Value *, 2> IntOps,
1538 Constant *Op1FpC, SmallVectorImpl<WithCache<const Value *>> &OpsKnown) {
1539
1540 Type *FPTy = BO.getType();
1541 Type *IntTy = IntOps[0]->getType();
1542
1543 unsigned IntSz = IntTy->getScalarSizeInBits();
1544 // This is the maximum number of inuse bits by the integer where the int -> fp
1545 // casts are exact.
1546 unsigned MaxRepresentableBits =
1547 APFloat::semanticsPrecision(FPTy->getScalarType()->getFltSemantics());
1548
1549 // Preserve known number of leading bits. This can allow us to trivial nsw/nuw
1550 // checks later on.
1551 unsigned NumUsedLeadingBits[2] = {IntSz, IntSz};
1552
1553 // NB: This only comes up if OpsFromSigned is true, so there is no need to
1554 // cache if between calls to `foldFBinOpOfIntCastsFromSign`.
1555 auto IsNonZero = [&](unsigned OpNo) -> bool {
1556 if (OpsKnown[OpNo].hasKnownBits() &&
1557 OpsKnown[OpNo].getKnownBits(Q: SQ).isNonZero())
1558 return true;
1559 return isKnownNonZero(V: IntOps[OpNo], Q: SQ);
1560 };
1561
1562 auto IsNonNeg = [&](unsigned OpNo) -> bool {
1563 // NB: This matches the impl in ValueTracking, we just try to use cached
1564 // knownbits here. If we ever start supporting WithCache for
1565 // `isKnownNonNegative`, change this to an explicit call.
1566 return OpsKnown[OpNo].getKnownBits(Q: SQ).isNonNegative();
1567 };
1568
1569 // Check if we know for certain that ({s|u}itofp op) is exact.
1570 auto IsValidPromotion = [&](unsigned OpNo) -> bool {
1571 // Can we treat this operand as the desired sign?
1572 if (OpsFromSigned != isa<SIToFPInst>(Val: BO.getOperand(i_nocapture: OpNo)) &&
1573 !IsNonNeg(OpNo))
1574 return false;
1575
1576 // If fp precision >= bitwidth(op) then its exact.
1577 // NB: This is slightly conservative for `sitofp`. For signed conversion, we
1578 // can handle `MaxRepresentableBits == IntSz - 1` as the sign bit will be
1579 // handled specially. We can't, however, increase the bound arbitrarily for
1580 // `sitofp` as for larger sizes, it won't sign extend.
1581 if (MaxRepresentableBits < IntSz) {
1582 // Otherwise if its signed cast check that fp precisions >= bitwidth(op) -
1583 // numSignBits(op).
1584 // TODO: If we add support for `WithCache` in `ComputeNumSignBits`, change
1585 // `IntOps[OpNo]` arguments to `KnownOps[OpNo]`.
1586 if (OpsFromSigned)
1587 NumUsedLeadingBits[OpNo] = IntSz - ComputeNumSignBits(Op: IntOps[OpNo]);
1588 // Finally for unsigned check that fp precision >= bitwidth(op) -
1589 // numLeadingZeros(op).
1590 else {
1591 NumUsedLeadingBits[OpNo] =
1592 IntSz - OpsKnown[OpNo].getKnownBits(Q: SQ).countMinLeadingZeros();
1593 }
1594 }
1595 // NB: We could also check if op is known to be a power of 2 or zero (which
1596 // will always be representable). Its unlikely, however, that is we are
1597 // unable to bound op in any way we will be able to pass the overflow checks
1598 // later on.
1599
1600 if (MaxRepresentableBits < NumUsedLeadingBits[OpNo])
1601 return false;
1602 // Signed + Mul also requires that op is non-zero to avoid -0 cases.
1603 return !OpsFromSigned || BO.getOpcode() != Instruction::FMul ||
1604 IsNonZero(OpNo);
1605 };
1606
1607 // If we have a constant rhs, see if we can losslessly convert it to an int.
1608 if (Op1FpC != nullptr) {
1609 // Signed + Mul req non-zero
1610 if (OpsFromSigned && BO.getOpcode() == Instruction::FMul &&
1611 !match(V: Op1FpC, P: m_NonZeroFP()))
1612 return nullptr;
1613
1614 Constant *Op1IntC = ConstantFoldCastOperand(
1615 Opcode: OpsFromSigned ? Instruction::FPToSI : Instruction::FPToUI, C: Op1FpC,
1616 DestTy: IntTy, DL);
1617 if (Op1IntC == nullptr)
1618 return nullptr;
1619 if (ConstantFoldCastOperand(Opcode: OpsFromSigned ? Instruction::SIToFP
1620 : Instruction::UIToFP,
1621 C: Op1IntC, DestTy: FPTy, DL) != Op1FpC)
1622 return nullptr;
1623
1624 // First try to keep sign of cast the same.
1625 IntOps[1] = Op1IntC;
1626 }
1627
1628 // Ensure lhs/rhs integer types match.
1629 if (IntTy != IntOps[1]->getType())
1630 return nullptr;
1631
1632 if (Op1FpC == nullptr) {
1633 if (!IsValidPromotion(1))
1634 return nullptr;
1635 }
1636 if (!IsValidPromotion(0))
1637 return nullptr;
1638
1639 // Final we check if the integer version of the binop will not overflow.
1640 BinaryOperator::BinaryOps IntOpc;
1641 // Because of the precision check, we can often rule out overflows.
1642 bool NeedsOverflowCheck = true;
1643 // Try to conservatively rule out overflow based on the already done precision
1644 // checks.
1645 unsigned OverflowMaxOutputBits = OpsFromSigned ? 2 : 1;
1646 unsigned OverflowMaxCurBits =
1647 std::max(a: NumUsedLeadingBits[0], b: NumUsedLeadingBits[1]);
1648 bool OutputSigned = OpsFromSigned;
1649 switch (BO.getOpcode()) {
1650 case Instruction::FAdd:
1651 IntOpc = Instruction::Add;
1652 OverflowMaxOutputBits += OverflowMaxCurBits;
1653 break;
1654 case Instruction::FSub:
1655 IntOpc = Instruction::Sub;
1656 OverflowMaxOutputBits += OverflowMaxCurBits;
1657 break;
1658 case Instruction::FMul:
1659 IntOpc = Instruction::Mul;
1660 OverflowMaxOutputBits += OverflowMaxCurBits * 2;
1661 break;
1662 default:
1663 llvm_unreachable("Unsupported binop");
1664 }
1665 // The precision check may have already ruled out overflow.
1666 if (OverflowMaxOutputBits < IntSz) {
1667 NeedsOverflowCheck = false;
1668 // We can bound unsigned overflow from sub to in range signed value (this is
1669 // what allows us to avoid the overflow check for sub).
1670 if (IntOpc == Instruction::Sub)
1671 OutputSigned = true;
1672 }
1673
1674 // Precision check did not rule out overflow, so need to check.
1675 // TODO: If we add support for `WithCache` in `willNotOverflow`, change
1676 // `IntOps[...]` arguments to `KnownOps[...]`.
1677 if (NeedsOverflowCheck &&
1678 !willNotOverflow(Opcode: IntOpc, LHS: IntOps[0], RHS: IntOps[1], CxtI: BO, IsSigned: OutputSigned))
1679 return nullptr;
1680
1681 Value *IntBinOp = Builder.CreateBinOp(Opc: IntOpc, LHS: IntOps[0], RHS: IntOps[1]);
1682 if (auto *IntBO = dyn_cast<BinaryOperator>(Val: IntBinOp)) {
1683 IntBO->setHasNoSignedWrap(OutputSigned);
1684 IntBO->setHasNoUnsignedWrap(!OutputSigned);
1685 }
1686 if (OutputSigned)
1687 return new SIToFPInst(IntBinOp, FPTy);
1688 return new UIToFPInst(IntBinOp, FPTy);
1689}
1690
1691// Try to fold:
1692// 1) (fp_binop ({s|u}itofp x), ({s|u}itofp y))
1693// -> ({s|u}itofp (int_binop x, y))
1694// 2) (fp_binop ({s|u}itofp x), FpC)
1695// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
1696Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1697 // Don't perform the fold on vectors, as the integer operation may be much
1698 // more expensive than the float operation in that case.
1699 if (BO.getType()->isVectorTy())
1700 return nullptr;
1701
1702 std::array<Value *, 2> IntOps = {nullptr, nullptr};
1703 Constant *Op1FpC = nullptr;
1704 // Check for:
1705 // 1) (binop ({s|u}itofp x), ({s|u}itofp y))
1706 // 2) (binop ({s|u}itofp x), FpC)
1707 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_SIToFP(Op: m_Value(V&: IntOps[0]))) &&
1708 !match(V: BO.getOperand(i_nocapture: 0), P: m_UIToFP(Op: m_Value(V&: IntOps[0]))))
1709 return nullptr;
1710
1711 if (!match(V: BO.getOperand(i_nocapture: 1), P: m_Constant(C&: Op1FpC)) &&
1712 !match(V: BO.getOperand(i_nocapture: 1), P: m_SIToFP(Op: m_Value(V&: IntOps[1]))) &&
1713 !match(V: BO.getOperand(i_nocapture: 1), P: m_UIToFP(Op: m_Value(V&: IntOps[1]))))
1714 return nullptr;
1715
1716 // Cache KnownBits a bit to potentially save some analysis.
1717 SmallVector<WithCache<const Value *>, 2> OpsKnown = {IntOps[0], IntOps[1]};
1718
1719 // Try treating x/y as coming from both `uitofp` and `sitofp`. There are
1720 // different constraints depending on the sign of the cast.
1721 // NB: `(uitofp nneg X)` == `(sitofp nneg X)`.
1722 if (Instruction *R = foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/false,
1723 IntOps, Op1FpC, OpsKnown))
1724 return R;
1725 return foldFBinOpOfIntCastsFromSign(BO, /*OpsFromSigned=*/true, IntOps,
1726 Op1FpC, OpsKnown);
1727}
1728
1729/// A binop with a constant operand and a sign-extended boolean operand may be
1730/// converted into a select of constants by applying the binary operation to
1731/// the constant with the two possible values of the extended boolean (0 or -1).
1732Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
1733 // TODO: Handle non-commutative binop (constant is operand 0).
1734 // TODO: Handle zext.
1735 // TODO: Peek through 'not' of cast.
1736 Value *BO0 = BO.getOperand(i_nocapture: 0);
1737 Value *BO1 = BO.getOperand(i_nocapture: 1);
1738 Value *X;
1739 Constant *C;
1740 if (!match(V: BO0, P: m_SExt(Op: m_Value(V&: X))) || !match(V: BO1, P: m_ImmConstant(C)) ||
1741 !X->getType()->isIntOrIntVectorTy(BitWidth: 1))
1742 return nullptr;
1743
1744 // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
1745 Constant *Ones = ConstantInt::getAllOnesValue(Ty: BO.getType());
1746 Constant *Zero = ConstantInt::getNullValue(Ty: BO.getType());
1747 Value *TVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Ones, RHS: C);
1748 Value *FVal = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Zero, RHS: C);
1749 return createSelectInstWithUnknownProfile(C: X, S1: TVal, S2: FVal);
1750}
1751
1752static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1753 bool IsTrueArm) {
1754 SmallVector<Value *> Ops;
1755 for (Value *Op : I.operands()) {
1756 Value *V = nullptr;
1757 if (Op == SI) {
1758 V = IsTrueArm ? SI->getTrueValue() : SI->getFalseValue();
1759 } else if (match(V: SI->getCondition(),
1760 P: m_SpecificICmp(MatchPred: IsTrueArm ? ICmpInst::ICMP_EQ
1761 : ICmpInst::ICMP_NE,
1762 L: m_Specific(V: Op), R: m_Value(V))) &&
1763 isGuaranteedNotToBeUndefOrPoison(V)) {
1764 // Pass
1765 } else if (match(V: Op, P: m_ZExt(Op: m_Specific(V: SI->getCondition())))) {
1766 V = IsTrueArm ? ConstantInt::get(Ty: Op->getType(), V: 1)
1767 : ConstantInt::getNullValue(Ty: Op->getType());
1768 } else {
1769 V = Op;
1770 }
1771 Ops.push_back(Elt: V);
1772 }
1773
1774 return simplifyInstructionWithOperands(I: &I, NewOps: Ops, Q: I.getDataLayout());
1775}
1776
1777static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
1778 Value *NewOp, InstCombiner &IC) {
1779 Instruction *Clone = I.clone();
1780 Clone->replaceUsesOfWith(From: SI, To: NewOp);
1781 Clone->dropUBImplyingAttrsAndMetadata();
1782 IC.InsertNewInstBefore(New: Clone, Old: I.getIterator());
1783 return Clone;
1784}
1785
1786Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
1787 bool FoldWithMultiUse,
1788 bool SimplifyBothArms) {
1789 // Don't modify shared select instructions unless set FoldWithMultiUse
1790 if (!SI->hasOneUser() && !FoldWithMultiUse)
1791 return nullptr;
1792
1793 Value *TV = SI->getTrueValue();
1794 Value *FV = SI->getFalseValue();
1795
1796 // Bool selects with constant operands can be folded to logical ops.
1797 if (SI->getType()->isIntOrIntVectorTy(BitWidth: 1))
1798 return nullptr;
1799
1800 // Avoid breaking min/max reduction pattern,
1801 // which is necessary for vectorization later.
1802 if (isa<MinMaxIntrinsic>(Val: &Op))
1803 for (Value *IntrinOp : Op.operands())
1804 if (auto *PN = dyn_cast<PHINode>(Val: IntrinOp))
1805 for (Value *PhiOp : PN->operands())
1806 if (PhiOp == &Op)
1807 return nullptr;
1808
1809 // Test if a FCmpInst instruction is used exclusively by a select as
1810 // part of a minimum or maximum operation. If so, refrain from doing
1811 // any other folding. This helps out other analyses which understand
1812 // non-obfuscated minimum and maximum idioms. And in this case, at
1813 // least one of the comparison operands has at least one user besides
1814 // the compare (the select), which would often largely negate the
1815 // benefit of folding anyway.
1816 if (auto *CI = dyn_cast<FCmpInst>(Val: SI->getCondition())) {
1817 if (CI->hasOneUse()) {
1818 Value *Op0 = CI->getOperand(i_nocapture: 0), *Op1 = CI->getOperand(i_nocapture: 1);
1819 if (((TV == Op0 && FV == Op1) || (FV == Op0 && TV == Op1)) &&
1820 !CI->isCommutative())
1821 return nullptr;
1822 }
1823 }
1824
1825 // Make sure that one of the select arms folds successfully.
1826 Value *NewTV = simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/true);
1827 Value *NewFV =
1828 simplifyOperationIntoSelectOperand(I&: Op, SI, /*IsTrueArm=*/false);
1829 if (!NewTV && !NewFV)
1830 return nullptr;
1831
1832 if (SimplifyBothArms && !(NewTV && NewFV))
1833 return nullptr;
1834
1835 // Create an instruction for the arm that did not fold.
1836 if (!NewTV)
1837 NewTV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: TV, IC&: *this);
1838 if (!NewFV)
1839 NewFV = foldOperationIntoSelectOperand(I&: Op, SI, NewOp: FV, IC&: *this);
1840 return SelectInst::Create(C: SI->getCondition(), S1: NewTV, S2: NewFV, NameStr: "", InsertBefore: nullptr, MDFrom: SI);
1841}
1842
1843static Value *simplifyInstructionWithPHI(Instruction &I, PHINode *PN,
1844 Value *InValue, BasicBlock *InBB,
1845 const DataLayout &DL,
1846 const SimplifyQuery SQ) {
1847 // NB: It is a precondition of this transform that the operands be
1848 // phi translatable!
1849 SmallVector<Value *> Ops;
1850 for (Value *Op : I.operands()) {
1851 if (Op == PN)
1852 Ops.push_back(Elt: InValue);
1853 else
1854 Ops.push_back(Elt: Op->DoPHITranslation(CurBB: PN->getParent(), PredBB: InBB));
1855 }
1856
1857 // Don't consider the simplification successful if we get back a constant
1858 // expression. That's just an instruction in hiding.
1859 // Also reject the case where we simplify back to the phi node. We wouldn't
1860 // be able to remove it in that case.
1861 Value *NewVal = simplifyInstructionWithOperands(
1862 I: &I, NewOps: Ops, Q: SQ.getWithInstruction(I: InBB->getTerminator()));
1863 if (NewVal && NewVal != PN && !match(V: NewVal, P: m_ConstantExpr()))
1864 return NewVal;
1865
1866 // Check if incoming PHI value can be replaced with constant
1867 // based on implied condition.
1868 BranchInst *TerminatorBI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
1869 const ICmpInst *ICmp = dyn_cast<ICmpInst>(Val: &I);
1870 if (TerminatorBI && TerminatorBI->isConditional() &&
1871 TerminatorBI->getSuccessor(i: 0) != TerminatorBI->getSuccessor(i: 1) && ICmp) {
1872 bool LHSIsTrue = TerminatorBI->getSuccessor(i: 0) == PN->getParent();
1873 std::optional<bool> ImpliedCond = isImpliedCondition(
1874 LHS: TerminatorBI->getCondition(), RHSPred: ICmp->getCmpPredicate(), RHSOp0: Ops[0], RHSOp1: Ops[1],
1875 DL, LHSIsTrue);
1876 if (ImpliedCond)
1877 return ConstantInt::getBool(Ty: I.getType(), V: ImpliedCond.value());
1878 }
1879
1880 return nullptr;
1881}
1882
1883/// In some cases it is beneficial to fold a select into a binary operator.
1884/// For example:
1885/// %1 = or %in, 4
1886/// %2 = select %cond, %1, %in
1887/// %3 = or %2, 1
1888/// =>
1889/// %1 = select i1 %cond, 5, 1
1890/// %2 = or %1, %in
1891Instruction *InstCombinerImpl::foldBinOpSelectBinOp(BinaryOperator &Op) {
1892 assert(Op.isAssociative() && "The operation must be associative!");
1893
1894 SelectInst *SI = dyn_cast<SelectInst>(Val: Op.getOperand(i_nocapture: 0));
1895
1896 Constant *Const;
1897 if (!SI || !match(V: Op.getOperand(i_nocapture: 1), P: m_ImmConstant(C&: Const)) ||
1898 !Op.hasOneUse() || !SI->hasOneUse())
1899 return nullptr;
1900
1901 Value *TV = SI->getTrueValue();
1902 Value *FV = SI->getFalseValue();
1903 Value *Input, *NewTV, *NewFV;
1904 Constant *Const2;
1905
1906 if (TV->hasOneUse() && match(V: TV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: FV),
1907 R: m_ImmConstant(C&: Const2)))) {
1908 NewTV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1909 NewFV = Const;
1910 Input = FV;
1911 } else if (FV->hasOneUse() &&
1912 match(V: FV, P: m_BinOp(Opcode: Op.getOpcode(), L: m_Specific(V: TV),
1913 R: m_ImmConstant(C&: Const2)))) {
1914 NewTV = Const;
1915 NewFV = ConstantFoldBinaryInstruction(Opcode: Op.getOpcode(), V1: Const, V2: Const2);
1916 Input = TV;
1917 } else
1918 return nullptr;
1919
1920 if (!NewTV || !NewFV)
1921 return nullptr;
1922
1923 Value *NewSI =
1924 Builder.CreateSelect(C: SI->getCondition(), True: NewTV, False: NewFV, Name: "",
1925 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : SI);
1926 return BinaryOperator::Create(Op: Op.getOpcode(), S1: NewSI, S2: Input);
1927}
1928
1929Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN,
1930 bool AllowMultipleUses) {
1931 unsigned NumPHIValues = PN->getNumIncomingValues();
1932 if (NumPHIValues == 0)
1933 return nullptr;
1934
1935 // We normally only transform phis with a single use. However, if a PHI has
1936 // multiple uses and they are all the same operation, we can fold *all* of the
1937 // uses into the PHI.
1938 bool OneUse = PN->hasOneUse();
1939 bool IdenticalUsers = false;
1940 if (!AllowMultipleUses && !OneUse) {
1941 // Walk the use list for the instruction, comparing them to I.
1942 for (User *U : PN->users()) {
1943 Instruction *UI = cast<Instruction>(Val: U);
1944 if (UI != &I && !I.isIdenticalTo(I: UI))
1945 return nullptr;
1946 }
1947 // Otherwise, we can replace *all* users with the new PHI we form.
1948 IdenticalUsers = true;
1949 }
1950
1951 // Check that all operands are phi-translatable.
1952 for (Value *Op : I.operands()) {
1953 if (Op == PN)
1954 continue;
1955
1956 // Non-instructions never require phi-translation.
1957 auto *I = dyn_cast<Instruction>(Val: Op);
1958 if (!I)
1959 continue;
1960
1961 // Phi-translate can handle phi nodes in the same block.
1962 if (isa<PHINode>(Val: I))
1963 if (I->getParent() == PN->getParent())
1964 continue;
1965
1966 // Operand dominates the block, no phi-translation necessary.
1967 if (DT.dominates(Def: I, BB: PN->getParent()))
1968 continue;
1969
1970 // Not phi-translatable, bail out.
1971 return nullptr;
1972 }
1973
1974 // Check to see whether the instruction can be folded into each phi operand.
1975 // If there is one operand that does not fold, remember the BB it is in.
1976 SmallVector<Value *> NewPhiValues;
1977 SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
1978 bool SeenNonSimplifiedInVal = false;
1979 for (unsigned i = 0; i != NumPHIValues; ++i) {
1980 Value *InVal = PN->getIncomingValue(i);
1981 BasicBlock *InBB = PN->getIncomingBlock(i);
1982
1983 if (auto *NewVal = simplifyInstructionWithPHI(I, PN, InValue: InVal, InBB, DL, SQ)) {
1984 NewPhiValues.push_back(Elt: NewVal);
1985 continue;
1986 }
1987
1988 // Handle some cases that can't be fully simplified, but where we know that
1989 // the two instructions will fold into one.
1990 auto WillFold = [&]() {
1991 if (!InVal->hasUseList() || !InVal->hasOneUser())
1992 return false;
1993
1994 // icmp of ucmp/scmp with constant will fold to icmp.
1995 const APInt *Ignored;
1996 if (isa<CmpIntrinsic>(Val: InVal) &&
1997 match(V: &I, P: m_ICmp(L: m_Specific(V: PN), R: m_APInt(Res&: Ignored))))
1998 return true;
1999
2000 // icmp eq zext(bool), 0 will fold to !bool.
2001 if (isa<ZExtInst>(Val: InVal) &&
2002 cast<ZExtInst>(Val: InVal)->getSrcTy()->isIntOrIntVectorTy(BitWidth: 1) &&
2003 match(V: &I,
2004 P: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ, L: m_Specific(V: PN), R: m_Zero())))
2005 return true;
2006
2007 return false;
2008 };
2009
2010 if (WillFold()) {
2011 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2012 NewPhiValues.push_back(Elt: nullptr);
2013 continue;
2014 }
2015
2016 if (!OneUse && !IdenticalUsers)
2017 return nullptr;
2018
2019 if (SeenNonSimplifiedInVal)
2020 return nullptr; // More than one non-simplified value.
2021 SeenNonSimplifiedInVal = true;
2022
2023 // If there is exactly one non-simplified value, we can insert a copy of the
2024 // operation in that block. However, if this is a critical edge, we would
2025 // be inserting the computation on some other paths (e.g. inside a loop).
2026 // Only do this if the pred block is unconditionally branching into the phi
2027 // block. Also, make sure that the pred block is not dead code.
2028 BranchInst *BI = dyn_cast<BranchInst>(Val: InBB->getTerminator());
2029 if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(A: InBB))
2030 return nullptr;
2031
2032 NewPhiValues.push_back(Elt: nullptr);
2033 OpsToMoveUseToIncomingBB.push_back(Elt: i);
2034
2035 // Do not push the operation across a loop backedge. This could result in
2036 // an infinite combine loop, and is generally non-profitable (especially
2037 // if the operation was originally outside the loop).
2038 if (isBackEdge(From: InBB, To: PN->getParent()))
2039 return nullptr;
2040 }
2041
2042 // Clone the instruction that uses the phi node and move it into the incoming
2043 // BB because we know that the next iteration of InstCombine will simplify it.
2044 SmallDenseMap<BasicBlock *, Instruction *> Clones;
2045 for (auto OpIndex : OpsToMoveUseToIncomingBB) {
2046 Value *Op = PN->getIncomingValue(i: OpIndex);
2047 BasicBlock *OpBB = PN->getIncomingBlock(i: OpIndex);
2048
2049 Instruction *Clone = Clones.lookup(Val: OpBB);
2050 if (!Clone) {
2051 Clone = I.clone();
2052 for (Use &U : Clone->operands()) {
2053 if (U == PN)
2054 U = Op;
2055 else
2056 U = U->DoPHITranslation(CurBB: PN->getParent(), PredBB: OpBB);
2057 }
2058 Clone = InsertNewInstBefore(New: Clone, Old: OpBB->getTerminator()->getIterator());
2059 Clones.insert(KV: {OpBB, Clone});
2060 // We may have speculated the instruction.
2061 Clone->dropUBImplyingAttrsAndMetadata();
2062 }
2063
2064 NewPhiValues[OpIndex] = Clone;
2065 }
2066
2067 // Okay, we can do the transformation: create the new PHI node.
2068 PHINode *NewPN = PHINode::Create(Ty: I.getType(), NumReservedValues: PN->getNumIncomingValues());
2069 InsertNewInstBefore(New: NewPN, Old: PN->getIterator());
2070 NewPN->takeName(V: PN);
2071 NewPN->setDebugLoc(PN->getDebugLoc());
2072
2073 for (unsigned i = 0; i != NumPHIValues; ++i)
2074 NewPN->addIncoming(V: NewPhiValues[i], BB: PN->getIncomingBlock(i));
2075
2076 if (IdenticalUsers) {
2077 // Collect and deduplicate users up-front to avoid iterator invalidation.
2078 SmallSetVector<Instruction *, 4> ToReplace;
2079 for (User *U : PN->users()) {
2080 Instruction *User = cast<Instruction>(Val: U);
2081 if (User == &I)
2082 continue;
2083 ToReplace.insert(X: User);
2084 }
2085 for (Instruction *I : ToReplace) {
2086 replaceInstUsesWith(I&: *I, V: NewPN);
2087 eraseInstFromFunction(I&: *I);
2088 }
2089 OneUse = true;
2090 }
2091
2092 if (OneUse) {
2093 replaceAllDbgUsesWith(From&: *PN, To&: *NewPN, DomPoint&: *PN, DT);
2094 }
2095 return replaceInstUsesWith(I, V: NewPN);
2096}
2097
2098Instruction *InstCombinerImpl::foldBinopWithRecurrence(BinaryOperator &BO) {
2099 if (!BO.isAssociative())
2100 return nullptr;
2101
2102 // Find the interleaved binary ops.
2103 auto Opc = BO.getOpcode();
2104 auto *BO0 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 0));
2105 auto *BO1 = dyn_cast<BinaryOperator>(Val: BO.getOperand(i_nocapture: 1));
2106 if (!BO0 || !BO1 || !BO0->hasNUses(N: 2) || !BO1->hasNUses(N: 2) ||
2107 BO0->getOpcode() != Opc || BO1->getOpcode() != Opc ||
2108 !BO0->isAssociative() || !BO1->isAssociative() ||
2109 BO0->getParent() != BO1->getParent())
2110 return nullptr;
2111
2112 assert(BO.isCommutative() && BO0->isCommutative() && BO1->isCommutative() &&
2113 "Expected commutative instructions!");
2114
2115 // Find the matching phis, forming the recurrences.
2116 PHINode *PN0, *PN1;
2117 Value *Start0, *Step0, *Start1, *Step1;
2118 if (!matchSimpleRecurrence(I: BO0, P&: PN0, Start&: Start0, Step&: Step0) || !PN0->hasOneUse() ||
2119 !matchSimpleRecurrence(I: BO1, P&: PN1, Start&: Start1, Step&: Step1) || !PN1->hasOneUse() ||
2120 PN0->getParent() != PN1->getParent())
2121 return nullptr;
2122
2123 assert(PN0->getNumIncomingValues() == 2 && PN1->getNumIncomingValues() == 2 &&
2124 "Expected PHIs with two incoming values!");
2125
2126 // Convert the start and step values to constants.
2127 auto *Init0 = dyn_cast<Constant>(Val: Start0);
2128 auto *Init1 = dyn_cast<Constant>(Val: Start1);
2129 auto *C0 = dyn_cast<Constant>(Val: Step0);
2130 auto *C1 = dyn_cast<Constant>(Val: Step1);
2131 if (!Init0 || !Init1 || !C0 || !C1)
2132 return nullptr;
2133
2134 // Fold the recurrence constants.
2135 auto *Init = ConstantFoldBinaryInstruction(Opcode: Opc, V1: Init0, V2: Init1);
2136 auto *C = ConstantFoldBinaryInstruction(Opcode: Opc, V1: C0, V2: C1);
2137 if (!Init || !C)
2138 return nullptr;
2139
2140 // Create the reduced PHI.
2141 auto *NewPN = PHINode::Create(Ty: PN0->getType(), NumReservedValues: PN0->getNumIncomingValues(),
2142 NameStr: "reduced.phi");
2143
2144 // Create the new binary op.
2145 auto *NewBO = BinaryOperator::Create(Op: Opc, S1: NewPN, S2: C);
2146 if (Opc == Instruction::FAdd || Opc == Instruction::FMul) {
2147 // Intersect FMF flags for FADD and FMUL.
2148 FastMathFlags Intersect = BO0->getFastMathFlags() &
2149 BO1->getFastMathFlags() & BO.getFastMathFlags();
2150 NewBO->setFastMathFlags(Intersect);
2151 } else {
2152 OverflowTracking Flags;
2153 Flags.AllKnownNonNegative = false;
2154 Flags.AllKnownNonZero = false;
2155 Flags.mergeFlags(I&: *BO0);
2156 Flags.mergeFlags(I&: *BO1);
2157 Flags.mergeFlags(I&: BO);
2158 Flags.applyFlags(I&: *NewBO);
2159 }
2160 NewBO->takeName(V: &BO);
2161
2162 for (unsigned I = 0, E = PN0->getNumIncomingValues(); I != E; ++I) {
2163 auto *V = PN0->getIncomingValue(i: I);
2164 auto *BB = PN0->getIncomingBlock(i: I);
2165 if (V == Init0) {
2166 assert(((PN1->getIncomingValue(0) == Init1 &&
2167 PN1->getIncomingBlock(0) == BB) ||
2168 (PN1->getIncomingValue(1) == Init1 &&
2169 PN1->getIncomingBlock(1) == BB)) &&
2170 "Invalid incoming block!");
2171 NewPN->addIncoming(V: Init, BB);
2172 } else if (V == BO0) {
2173 assert(((PN1->getIncomingValue(0) == BO1 &&
2174 PN1->getIncomingBlock(0) == BB) ||
2175 (PN1->getIncomingValue(1) == BO1 &&
2176 PN1->getIncomingBlock(1) == BB)) &&
2177 "Invalid incoming block!");
2178 NewPN->addIncoming(V: NewBO, BB);
2179 } else
2180 llvm_unreachable("Unexpected incoming value!");
2181 }
2182
2183 LLVM_DEBUG(dbgs() << " Combined " << *PN0 << "\n " << *BO0
2184 << "\n with " << *PN1 << "\n " << *BO1
2185 << '\n');
2186
2187 // Insert the new recurrence and remove the old (dead) ones.
2188 InsertNewInstWith(New: NewPN, Old: PN0->getIterator());
2189 InsertNewInstWith(New: NewBO, Old: BO0->getIterator());
2190
2191 eraseInstFromFunction(
2192 I&: *replaceInstUsesWith(I&: *BO0, V: PoisonValue::get(T: BO0->getType())));
2193 eraseInstFromFunction(
2194 I&: *replaceInstUsesWith(I&: *BO1, V: PoisonValue::get(T: BO1->getType())));
2195 eraseInstFromFunction(I&: *PN0);
2196 eraseInstFromFunction(I&: *PN1);
2197
2198 return replaceInstUsesWith(I&: BO, V: NewBO);
2199}
2200
2201Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
2202 // Attempt to fold binary operators whose operands are simple recurrences.
2203 if (auto *NewBO = foldBinopWithRecurrence(BO))
2204 return NewBO;
2205
2206 // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
2207 // we are guarding against replicating the binop in >1 predecessor.
2208 // This could miss matching a phi with 2 constant incoming values.
2209 auto *Phi0 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 0));
2210 auto *Phi1 = dyn_cast<PHINode>(Val: BO.getOperand(i_nocapture: 1));
2211 if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
2212 Phi0->getNumOperands() != Phi1->getNumOperands())
2213 return nullptr;
2214
2215 // TODO: Remove the restriction for binop being in the same block as the phis.
2216 if (BO.getParent() != Phi0->getParent() ||
2217 BO.getParent() != Phi1->getParent())
2218 return nullptr;
2219
2220 // Fold if there is at least one specific constant value in phi0 or phi1's
2221 // incoming values that comes from the same block and this specific constant
2222 // value can be used to do optimization for specific binary operator.
2223 // For example:
2224 // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
2225 // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
2226 // %add = add i32 %phi0, %phi1
2227 // ==>
2228 // %add = phi i32 [%j, %bb0], [%i, %bb1]
2229 Constant *C = ConstantExpr::getBinOpIdentity(Opcode: BO.getOpcode(), Ty: BO.getType(),
2230 /*AllowRHSConstant*/ false);
2231 if (C) {
2232 SmallVector<Value *, 4> NewIncomingValues;
2233 auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
2234 auto &Phi0Use = std::get<0>(t&: T);
2235 auto &Phi1Use = std::get<1>(t&: T);
2236 if (Phi0->getIncomingBlock(U: Phi0Use) != Phi1->getIncomingBlock(U: Phi1Use))
2237 return false;
2238 Value *Phi0UseV = Phi0Use.get();
2239 Value *Phi1UseV = Phi1Use.get();
2240 if (Phi0UseV == C)
2241 NewIncomingValues.push_back(Elt: Phi1UseV);
2242 else if (Phi1UseV == C)
2243 NewIncomingValues.push_back(Elt: Phi0UseV);
2244 else
2245 return false;
2246 return true;
2247 };
2248
2249 if (all_of(Range: zip(t: Phi0->operands(), u: Phi1->operands()),
2250 P: CanFoldIncomingValuePair)) {
2251 PHINode *NewPhi =
2252 PHINode::Create(Ty: Phi0->getType(), NumReservedValues: Phi0->getNumOperands());
2253 assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
2254 "The number of collected incoming values should equal the number "
2255 "of the original PHINode operands!");
2256 for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
2257 NewPhi->addIncoming(V: NewIncomingValues[I], BB: Phi0->getIncomingBlock(i: I));
2258 return NewPhi;
2259 }
2260 }
2261
2262 if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
2263 return nullptr;
2264
2265 // Match a pair of incoming constants for one of the predecessor blocks.
2266 BasicBlock *ConstBB, *OtherBB;
2267 Constant *C0, *C1;
2268 if (match(V: Phi0->getIncomingValue(i: 0), P: m_ImmConstant(C&: C0))) {
2269 ConstBB = Phi0->getIncomingBlock(i: 0);
2270 OtherBB = Phi0->getIncomingBlock(i: 1);
2271 } else if (match(V: Phi0->getIncomingValue(i: 1), P: m_ImmConstant(C&: C0))) {
2272 ConstBB = Phi0->getIncomingBlock(i: 1);
2273 OtherBB = Phi0->getIncomingBlock(i: 0);
2274 } else {
2275 return nullptr;
2276 }
2277 if (!match(V: Phi1->getIncomingValueForBlock(BB: ConstBB), P: m_ImmConstant(C&: C1)))
2278 return nullptr;
2279
2280 // The block that we are hoisting to must reach here unconditionally.
2281 // Otherwise, we could be speculatively executing an expensive or
2282 // non-speculative op.
2283 auto *PredBlockBranch = dyn_cast<BranchInst>(Val: OtherBB->getTerminator());
2284 if (!PredBlockBranch || PredBlockBranch->isConditional() ||
2285 !DT.isReachableFromEntry(A: OtherBB))
2286 return nullptr;
2287
2288 // TODO: This check could be tightened to only apply to binops (div/rem) that
2289 // are not safe to speculatively execute. But that could allow hoisting
2290 // potentially expensive instructions (fdiv for example).
2291 for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
2292 if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBIter))
2293 return nullptr;
2294
2295 // Fold constants for the predecessor block with constant incoming values.
2296 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: BO.getOpcode(), LHS: C0, RHS: C1, DL);
2297 if (!NewC)
2298 return nullptr;
2299
2300 // Make a new binop in the predecessor block with the non-constant incoming
2301 // values.
2302 Builder.SetInsertPoint(PredBlockBranch);
2303 Value *NewBO = Builder.CreateBinOp(Opc: BO.getOpcode(),
2304 LHS: Phi0->getIncomingValueForBlock(BB: OtherBB),
2305 RHS: Phi1->getIncomingValueForBlock(BB: OtherBB));
2306 if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(Val: NewBO))
2307 NotFoldedNewBO->copyIRFlags(V: &BO);
2308
2309 // Replace the binop with a phi of the new values. The old phis are dead.
2310 PHINode *NewPhi = PHINode::Create(Ty: BO.getType(), NumReservedValues: 2);
2311 NewPhi->addIncoming(V: NewBO, BB: OtherBB);
2312 NewPhi->addIncoming(V: NewC, BB: ConstBB);
2313 return NewPhi;
2314}
2315
2316Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
2317 bool IsOtherParamConst = isa<Constant>(Val: I.getOperand(i_nocapture: 1));
2318
2319 if (auto *Sel = dyn_cast<SelectInst>(Val: I.getOperand(i_nocapture: 0))) {
2320 if (Instruction *NewSel =
2321 FoldOpIntoSelect(Op&: I, SI: Sel, FoldWithMultiUse: false, SimplifyBothArms: !IsOtherParamConst))
2322 return NewSel;
2323 } else if (auto *PN = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0))) {
2324 if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
2325 return NewPhi;
2326 }
2327 return nullptr;
2328}
2329
2330static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
2331 // If this GEP has only 0 indices, it is the same pointer as
2332 // Src. If Src is not a trivial GEP too, don't combine
2333 // the indices.
2334 if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() &&
2335 !Src.hasOneUse())
2336 return false;
2337 return true;
2338}
2339
2340/// Find a constant NewC that has property:
2341/// shuffle(NewC, ShMask) = C
2342/// Returns nullptr if such a constant does not exist e.g. ShMask=<0,0> C=<1,2>
2343///
2344/// A 1-to-1 mapping is not required. Example:
2345/// ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <poison,5,6,poison>
2346Constant *InstCombinerImpl::unshuffleConstant(ArrayRef<int> ShMask, Constant *C,
2347 VectorType *NewCTy) {
2348 if (isa<ScalableVectorType>(Val: NewCTy)) {
2349 Constant *Splat = C->getSplatValue();
2350 if (!Splat)
2351 return nullptr;
2352 return ConstantVector::getSplat(EC: NewCTy->getElementCount(), Elt: Splat);
2353 }
2354
2355 if (cast<FixedVectorType>(Val: NewCTy)->getNumElements() >
2356 cast<FixedVectorType>(Val: C->getType())->getNumElements())
2357 return nullptr;
2358
2359 unsigned NewCNumElts = cast<FixedVectorType>(Val: NewCTy)->getNumElements();
2360 PoisonValue *PoisonScalar = PoisonValue::get(T: C->getType()->getScalarType());
2361 SmallVector<Constant *, 16> NewVecC(NewCNumElts, PoisonScalar);
2362 unsigned NumElts = cast<FixedVectorType>(Val: C->getType())->getNumElements();
2363 for (unsigned I = 0; I < NumElts; ++I) {
2364 Constant *CElt = C->getAggregateElement(Elt: I);
2365 if (ShMask[I] >= 0) {
2366 assert(ShMask[I] < (int)NumElts && "Not expecting narrowing shuffle");
2367 Constant *NewCElt = NewVecC[ShMask[I]];
2368 // Bail out if:
2369 // 1. The constant vector contains a constant expression.
2370 // 2. The shuffle needs an element of the constant vector that can't
2371 // be mapped to a new constant vector.
2372 // 3. This is a widening shuffle that copies elements of V1 into the
2373 // extended elements (extending with poison is allowed).
2374 if (!CElt || (!isa<PoisonValue>(Val: NewCElt) && NewCElt != CElt) ||
2375 I >= NewCNumElts)
2376 return nullptr;
2377 NewVecC[ShMask[I]] = CElt;
2378 }
2379 }
2380 return ConstantVector::get(V: NewVecC);
2381}
2382
2383// Get the result of `Vector Op Splat` (or Splat Op Vector if \p SplatLHS).
2384static Constant *constantFoldBinOpWithSplat(unsigned Opcode, Constant *Vector,
2385 Constant *Splat, bool SplatLHS,
2386 const DataLayout &DL) {
2387 ElementCount EC = cast<VectorType>(Val: Vector->getType())->getElementCount();
2388 Constant *LHS = ConstantVector::getSplat(EC, Elt: Splat);
2389 Constant *RHS = Vector;
2390 if (!SplatLHS)
2391 std::swap(a&: LHS, b&: RHS);
2392 return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
2393}
2394
2395Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
2396 if (!isa<VectorType>(Val: Inst.getType()))
2397 return nullptr;
2398
2399 BinaryOperator::BinaryOps Opcode = Inst.getOpcode();
2400 Value *LHS = Inst.getOperand(i_nocapture: 0), *RHS = Inst.getOperand(i_nocapture: 1);
2401 assert(cast<VectorType>(LHS->getType())->getElementCount() ==
2402 cast<VectorType>(Inst.getType())->getElementCount());
2403 assert(cast<VectorType>(RHS->getType())->getElementCount() ==
2404 cast<VectorType>(Inst.getType())->getElementCount());
2405
2406 auto foldConstantsThroughSubVectorInsertSplat =
2407 [&](Value *MaybeSubVector, Value *MaybeSplat,
2408 bool SplatLHS) -> Instruction * {
2409 Value *Idx;
2410 Constant *Splat, *SubVector, *Dest;
2411 if (!match(V: MaybeSplat, P: m_ConstantSplat(SubPattern: m_Constant(C&: Splat))) ||
2412 !match(V: MaybeSubVector,
2413 P: m_VectorInsert(Op0: m_Constant(C&: Dest), Op1: m_Constant(C&: SubVector),
2414 Op2: m_Value(V&: Idx))))
2415 return nullptr;
2416 SubVector =
2417 constantFoldBinOpWithSplat(Opcode, Vector: SubVector, Splat, SplatLHS, DL);
2418 Dest = constantFoldBinOpWithSplat(Opcode, Vector: Dest, Splat, SplatLHS, DL);
2419 if (!SubVector || !Dest)
2420 return nullptr;
2421 auto *InsertVector =
2422 Builder.CreateInsertVector(DstType: Dest->getType(), SrcVec: Dest, SubVec: SubVector, Idx);
2423 return replaceInstUsesWith(I&: Inst, V: InsertVector);
2424 };
2425
2426 // If one operand is a constant splat and the other operand is a
2427 // `vector.insert` where both the destination and subvector are constant,
2428 // apply the operation to both the destination and subvector, returning a new
2429 // constant `vector.insert`. This helps constant folding for scalable vectors.
2430 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2431 /*MaybeSubVector=*/LHS, /*MaybeSplat=*/RHS, /*SplatLHS=*/false))
2432 return Folded;
2433 if (Instruction *Folded = foldConstantsThroughSubVectorInsertSplat(
2434 /*MaybeSubVector=*/RHS, /*MaybeSplat=*/LHS, /*SplatLHS=*/true))
2435 return Folded;
2436
2437 // If both operands of the binop are vector concatenations, then perform the
2438 // narrow binop on each pair of the source operands followed by concatenation
2439 // of the results.
2440 Value *L0, *L1, *R0, *R1;
2441 ArrayRef<int> Mask;
2442 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: L0), v2: m_Value(V&: L1), mask: m_Mask(Mask))) &&
2443 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: R0), v2: m_Value(V&: R1), mask: m_SpecificMask(Mask))) &&
2444 LHS->hasOneUse() && RHS->hasOneUse() &&
2445 cast<ShuffleVectorInst>(Val: LHS)->isConcat() &&
2446 cast<ShuffleVectorInst>(Val: RHS)->isConcat()) {
2447 // This transform does not have the speculative execution constraint as
2448 // below because the shuffle is a concatenation. The new binops are
2449 // operating on exactly the same elements as the existing binop.
2450 // TODO: We could ease the mask requirement to allow different undef lanes,
2451 // but that requires an analysis of the binop-with-undef output value.
2452 Value *NewBO0 = Builder.CreateBinOp(Opc: Opcode, LHS: L0, RHS: R0);
2453 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO0))
2454 BO->copyIRFlags(V: &Inst);
2455 Value *NewBO1 = Builder.CreateBinOp(Opc: Opcode, LHS: L1, RHS: R1);
2456 if (auto *BO = dyn_cast<BinaryOperator>(Val: NewBO1))
2457 BO->copyIRFlags(V: &Inst);
2458 return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
2459 }
2460
2461 auto createBinOpReverse = [&](Value *X, Value *Y) {
2462 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2463 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2464 BO->copyIRFlags(V: &Inst);
2465 Module *M = Inst.getModule();
2466 Function *F = Intrinsic::getOrInsertDeclaration(
2467 M, id: Intrinsic::vector_reverse, Tys: V->getType());
2468 return CallInst::Create(Func: F, Args: V);
2469 };
2470
2471 // NOTE: Reverse shuffles don't require the speculative execution protection
2472 // below because they don't affect which lanes take part in the computation.
2473
2474 Value *V1, *V2;
2475 if (match(V: LHS, P: m_VecReverse(Op0: m_Value(V&: V1)))) {
2476 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2477 if (match(V: RHS, P: m_VecReverse(Op0: m_Value(V&: V2))) &&
2478 (LHS->hasOneUse() || RHS->hasOneUse() ||
2479 (LHS == RHS && LHS->hasNUses(N: 2))))
2480 return createBinOpReverse(V1, V2);
2481
2482 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2483 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2484 return createBinOpReverse(V1, RHS);
2485 }
2486 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2487 else if (isSplatValue(V: LHS) && match(V: RHS, P: m_OneUse(SubPattern: m_VecReverse(Op0: m_Value(V&: V2)))))
2488 return createBinOpReverse(LHS, V2);
2489
2490 auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
2491 Value *V = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y, Name: Inst.getName());
2492 if (auto *BO = dyn_cast<BinaryOperator>(Val: V))
2493 BO->copyIRFlags(V: &Inst);
2494
2495 ElementCount EC = cast<VectorType>(Val: V->getType())->getElementCount();
2496 Value *AllTrueMask = Builder.CreateVectorSplat(EC, V: Builder.getTrue());
2497 Module *M = Inst.getModule();
2498 Function *F = Intrinsic::getOrInsertDeclaration(
2499 M, id: Intrinsic::experimental_vp_reverse, Tys: V->getType());
2500 return CallInst::Create(Func: F, Args: {V, AllTrueMask, EVL});
2501 };
2502
2503 Value *EVL;
2504 if (match(V: LHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2505 Op0: m_Value(V&: V1), Op1: m_AllOnes(), Op2: m_Value(V&: EVL)))) {
2506 // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
2507 if (match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2508 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Specific(V: EVL))) &&
2509 (LHS->hasOneUse() || RHS->hasOneUse() ||
2510 (LHS == RHS && LHS->hasNUses(N: 2))))
2511 return createBinOpVPReverse(V1, V2, EVL);
2512
2513 // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
2514 if (LHS->hasOneUse() && isSplatValue(V: RHS))
2515 return createBinOpVPReverse(V1, RHS, EVL);
2516 }
2517 // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
2518 else if (isSplatValue(V: LHS) &&
2519 match(V: RHS, P: m_Intrinsic<Intrinsic::experimental_vp_reverse>(
2520 Op0: m_Value(V&: V2), Op1: m_AllOnes(), Op2: m_Value(V&: EVL))))
2521 return createBinOpVPReverse(LHS, V2, EVL);
2522
2523 // It may not be safe to reorder shuffles and things like div, urem, etc.
2524 // because we may trap when executing those ops on unknown vector elements.
2525 // See PR20059.
2526 if (!isSafeToSpeculativelyExecuteWithVariableReplaced(I: &Inst))
2527 return nullptr;
2528
2529 auto createBinOpShuffle = [&](Value *X, Value *Y, ArrayRef<int> M) {
2530 Value *XY = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2531 if (auto *BO = dyn_cast<BinaryOperator>(Val: XY))
2532 BO->copyIRFlags(V: &Inst);
2533 return new ShuffleVectorInst(XY, M);
2534 };
2535
2536 // If both arguments of the binary operation are shuffles that use the same
2537 // mask and shuffle within a single vector, move the shuffle after the binop.
2538 if (match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(), mask: m_Mask(Mask))) &&
2539 match(V: RHS, P: m_Shuffle(v1: m_Value(V&: V2), v2: m_Poison(), mask: m_SpecificMask(Mask))) &&
2540 V1->getType() == V2->getType() &&
2541 (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) {
2542 // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask)
2543 return createBinOpShuffle(V1, V2, Mask);
2544 }
2545
2546 // If both arguments of a commutative binop are select-shuffles that use the
2547 // same mask with commuted operands, the shuffles are unnecessary.
2548 if (Inst.isCommutative() &&
2549 match(V: LHS, P: m_Shuffle(v1: m_Value(V&: V1), v2: m_Value(V&: V2), mask: m_Mask(Mask))) &&
2550 match(V: RHS,
2551 P: m_Shuffle(v1: m_Specific(V: V2), v2: m_Specific(V: V1), mask: m_SpecificMask(Mask)))) {
2552 auto *LShuf = cast<ShuffleVectorInst>(Val: LHS);
2553 auto *RShuf = cast<ShuffleVectorInst>(Val: RHS);
2554 // TODO: Allow shuffles that contain undefs in the mask?
2555 // That is legal, but it reduces undef knowledge.
2556 // TODO: Allow arbitrary shuffles by shuffling after binop?
2557 // That might be legal, but we have to deal with poison.
2558 if (LShuf->isSelect() &&
2559 !is_contained(Range: LShuf->getShuffleMask(), Element: PoisonMaskElem) &&
2560 RShuf->isSelect() &&
2561 !is_contained(Range: RShuf->getShuffleMask(), Element: PoisonMaskElem)) {
2562 // Example:
2563 // LHS = shuffle V1, V2, <0, 5, 6, 3>
2564 // RHS = shuffle V2, V1, <0, 5, 6, 3>
2565 // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
2566 Instruction *NewBO = BinaryOperator::Create(Op: Opcode, S1: V1, S2: V2);
2567 NewBO->copyIRFlags(V: &Inst);
2568 return NewBO;
2569 }
2570 }
2571
2572 // If one argument is a shuffle within one vector and the other is a constant,
2573 // try moving the shuffle after the binary operation. This canonicalization
2574 // intends to move shuffles closer to other shuffles and binops closer to
2575 // other binops, so they can be folded. It may also enable demanded elements
2576 // transforms.
2577 Constant *C;
2578 if (match(V: &Inst, P: m_c_BinOp(L: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: V1), v2: m_Poison(),
2579 mask: m_Mask(Mask))),
2580 R: m_ImmConstant(C)))) {
2581 assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
2582 "Shuffle should not change scalar type");
2583
2584 bool ConstOp1 = isa<Constant>(Val: RHS);
2585 if (Constant *NewC =
2586 unshuffleConstant(ShMask: Mask, C, NewCTy: cast<VectorType>(Val: V1->getType()))) {
2587 // For fixed vectors, lanes of NewC not used by the shuffle will be poison
2588 // which will cause UB for div/rem. Mask them with a safe constant.
2589 if (isa<FixedVectorType>(Val: V1->getType()) && Inst.isIntDivRem())
2590 NewC = getSafeVectorConstantForBinop(Opcode, In: NewC, IsRHSConstant: ConstOp1);
2591
2592 // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask)
2593 // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask)
2594 Value *NewLHS = ConstOp1 ? V1 : NewC;
2595 Value *NewRHS = ConstOp1 ? NewC : V1;
2596 return createBinOpShuffle(NewLHS, NewRHS, Mask);
2597 }
2598 }
2599
2600 // Try to reassociate to sink a splat shuffle after a binary operation.
2601 if (Inst.isAssociative() && Inst.isCommutative()) {
2602 // Canonicalize shuffle operand as LHS.
2603 if (isa<ShuffleVectorInst>(Val: RHS))
2604 std::swap(a&: LHS, b&: RHS);
2605
2606 Value *X;
2607 ArrayRef<int> MaskC;
2608 int SplatIndex;
2609 Value *Y, *OtherOp;
2610 if (!match(V: LHS,
2611 P: m_OneUse(SubPattern: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask(MaskC)))) ||
2612 !match(Mask: MaskC, P: m_SplatOrPoisonMask(SplatIndex)) ||
2613 X->getType() != Inst.getType() ||
2614 !match(V: RHS, P: m_OneUse(SubPattern: m_BinOp(Opcode, L: m_Value(V&: Y), R: m_Value(V&: OtherOp)))))
2615 return nullptr;
2616
2617 // FIXME: This may not be safe if the analysis allows undef elements. By
2618 // moving 'Y' before the splat shuffle, we are implicitly assuming
2619 // that it is not undef/poison at the splat index.
2620 if (isSplatValue(V: OtherOp, Index: SplatIndex)) {
2621 std::swap(a&: Y, b&: OtherOp);
2622 } else if (!isSplatValue(V: Y, Index: SplatIndex)) {
2623 return nullptr;
2624 }
2625
2626 // X and Y are splatted values, so perform the binary operation on those
2627 // values followed by a splat followed by the 2nd binary operation:
2628 // bo (splat X), (bo Y, OtherOp) --> bo (splat (bo X, Y)), OtherOp
2629 Value *NewBO = Builder.CreateBinOp(Opc: Opcode, LHS: X, RHS: Y);
2630 SmallVector<int, 8> NewMask(MaskC.size(), SplatIndex);
2631 Value *NewSplat = Builder.CreateShuffleVector(V: NewBO, Mask: NewMask);
2632 Instruction *R = BinaryOperator::Create(Op: Opcode, S1: NewSplat, S2: OtherOp);
2633
2634 // Intersect FMF on both new binops. Other (poison-generating) flags are
2635 // dropped to be safe.
2636 if (isa<FPMathOperator>(Val: R)) {
2637 R->copyFastMathFlags(I: &Inst);
2638 R->andIRFlags(V: RHS);
2639 }
2640 if (auto *NewInstBO = dyn_cast<BinaryOperator>(Val: NewBO))
2641 NewInstBO->copyIRFlags(V: R);
2642 return R;
2643 }
2644
2645 return nullptr;
2646}
2647
2648/// Try to narrow the width of a binop if at least 1 operand is an extend of
2649/// of a value. This requires a potentially expensive known bits check to make
2650/// sure the narrow op does not overflow.
2651Instruction *InstCombinerImpl::narrowMathIfNoOverflow(BinaryOperator &BO) {
2652 // We need at least one extended operand.
2653 Value *Op0 = BO.getOperand(i_nocapture: 0), *Op1 = BO.getOperand(i_nocapture: 1);
2654
2655 // If this is a sub, we swap the operands since we always want an extension
2656 // on the RHS. The LHS can be an extension or a constant.
2657 if (BO.getOpcode() == Instruction::Sub)
2658 std::swap(a&: Op0, b&: Op1);
2659
2660 Value *X;
2661 bool IsSext = match(V: Op0, P: m_SExt(Op: m_Value(V&: X)));
2662 if (!IsSext && !match(V: Op0, P: m_ZExt(Op: m_Value(V&: X))))
2663 return nullptr;
2664
2665 // If both operands are the same extension from the same source type and we
2666 // can eliminate at least one (hasOneUse), this might work.
2667 CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
2668 Value *Y;
2669 if (!(match(V: Op1, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) && X->getType() == Y->getType() &&
2670 cast<Operator>(Val: Op1)->getOpcode() == CastOpc &&
2671 (Op0->hasOneUse() || Op1->hasOneUse()))) {
2672 // If that did not match, see if we have a suitable constant operand.
2673 // Truncating and extending must produce the same constant.
2674 Constant *WideC;
2675 if (!Op0->hasOneUse() || !match(V: Op1, P: m_Constant(C&: WideC)))
2676 return nullptr;
2677 Constant *NarrowC = getLosslessInvCast(C: WideC, InvCastTo: X->getType(), CastOp: CastOpc, DL);
2678 if (!NarrowC)
2679 return nullptr;
2680 Y = NarrowC;
2681 }
2682
2683 // Swap back now that we found our operands.
2684 if (BO.getOpcode() == Instruction::Sub)
2685 std::swap(a&: X, b&: Y);
2686
2687 // Both operands have narrow versions. Last step: the math must not overflow
2688 // in the narrow width.
2689 if (!willNotOverflow(Opcode: BO.getOpcode(), LHS: X, RHS: Y, CxtI: BO, IsSigned: IsSext))
2690 return nullptr;
2691
2692 // bo (ext X), (ext Y) --> ext (bo X, Y)
2693 // bo (ext X), C --> ext (bo X, C')
2694 Value *NarrowBO = Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: X, RHS: Y, Name: "narrow");
2695 if (auto *NewBinOp = dyn_cast<BinaryOperator>(Val: NarrowBO)) {
2696 if (IsSext)
2697 NewBinOp->setHasNoSignedWrap();
2698 else
2699 NewBinOp->setHasNoUnsignedWrap();
2700 }
2701 return CastInst::Create(CastOpc, S: NarrowBO, Ty: BO.getType());
2702}
2703
2704/// Determine nowrap flags for (gep (gep p, x), y) to (gep p, (x + y))
2705/// transform.
2706static GEPNoWrapFlags getMergedGEPNoWrapFlags(GEPOperator &GEP1,
2707 GEPOperator &GEP2) {
2708 return GEP1.getNoWrapFlags().intersectForOffsetAdd(Other: GEP2.getNoWrapFlags());
2709}
2710
2711/// Thread a GEP operation with constant indices through the constant true/false
2712/// arms of a select.
2713static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
2714 InstCombiner::BuilderTy &Builder) {
2715 if (!GEP.hasAllConstantIndices())
2716 return nullptr;
2717
2718 Instruction *Sel;
2719 Value *Cond;
2720 Constant *TrueC, *FalseC;
2721 if (!match(V: GEP.getPointerOperand(), P: m_Instruction(I&: Sel)) ||
2722 !match(V: Sel,
2723 P: m_Select(C: m_Value(V&: Cond), L: m_Constant(C&: TrueC), R: m_Constant(C&: FalseC))))
2724 return nullptr;
2725
2726 // gep (select Cond, TrueC, FalseC), IndexC --> select Cond, TrueC', FalseC'
2727 // Propagate 'inbounds' and metadata from existing instructions.
2728 // Note: using IRBuilder to create the constants for efficiency.
2729 SmallVector<Value *, 4> IndexC(GEP.indices());
2730 GEPNoWrapFlags NW = GEP.getNoWrapFlags();
2731 Type *Ty = GEP.getSourceElementType();
2732 Value *NewTrueC = Builder.CreateGEP(Ty, Ptr: TrueC, IdxList: IndexC, Name: "", NW);
2733 Value *NewFalseC = Builder.CreateGEP(Ty, Ptr: FalseC, IdxList: IndexC, Name: "", NW);
2734 return SelectInst::Create(C: Cond, S1: NewTrueC, S2: NewFalseC, NameStr: "", InsertBefore: nullptr, MDFrom: Sel);
2735}
2736
2737// Canonicalization:
2738// gep T, (gep i8, base, C1), (Index + C2) into
2739// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2740static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2741 GEPOperator *Src,
2742 InstCombinerImpl &IC) {
2743 if (GEP.getNumIndices() != 1)
2744 return nullptr;
2745 auto &DL = IC.getDataLayout();
2746 Value *Base;
2747 const APInt *C1;
2748 if (!match(V: Src, P: m_PtrAdd(PointerOp: m_Value(V&: Base), OffsetOp: m_APInt(Res&: C1))))
2749 return nullptr;
2750 Value *VarIndex;
2751 const APInt *C2;
2752 Type *PtrTy = Src->getType()->getScalarType();
2753 unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(Ty: PtrTy);
2754 if (!match(V: GEP.getOperand(i_nocapture: 1), P: m_AddLike(L: m_Value(V&: VarIndex), R: m_APInt(Res&: C2))))
2755 return nullptr;
2756 if (C1->getBitWidth() != IndexSizeInBits ||
2757 C2->getBitWidth() != IndexSizeInBits)
2758 return nullptr;
2759 Type *BaseType = GEP.getSourceElementType();
2760 if (isa<ScalableVectorType>(Val: BaseType))
2761 return nullptr;
2762 APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(Ty: BaseType));
2763 APInt NewOffset = TypeSize * *C2 + *C1;
2764 if (NewOffset.isZero() ||
2765 (Src->hasOneUse() && GEP.getOperand(i_nocapture: 1)->hasOneUse())) {
2766 GEPNoWrapFlags Flags = GEPNoWrapFlags::none();
2767 if (GEP.hasNoUnsignedWrap() &&
2768 cast<GEPOperator>(Val: Src)->hasNoUnsignedWrap() &&
2769 match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()))) {
2770 Flags |= GEPNoWrapFlags::noUnsignedWrap();
2771 if (GEP.isInBounds() && cast<GEPOperator>(Val: Src)->isInBounds())
2772 Flags |= GEPNoWrapFlags::inBounds();
2773 }
2774
2775 Value *GEPConst =
2776 IC.Builder.CreatePtrAdd(Ptr: Base, Offset: IC.Builder.getInt(AI: NewOffset), Name: "", NW: Flags);
2777 return GetElementPtrInst::Create(PointeeType: BaseType, Ptr: GEPConst, IdxList: VarIndex, NW: Flags);
2778 }
2779
2780 return nullptr;
2781}
2782
2783/// Combine constant offsets separated by variable offsets.
2784/// ptradd (ptradd (ptradd p, C1), x), C2 -> ptradd (ptradd p, x), C1+C2
2785static Instruction *combineConstantOffsets(GetElementPtrInst &GEP,
2786 InstCombinerImpl &IC) {
2787 if (!GEP.hasAllConstantIndices())
2788 return nullptr;
2789
2790 GEPNoWrapFlags NW = GEPNoWrapFlags::all();
2791 SmallVector<GetElementPtrInst *> Skipped;
2792 auto *InnerGEP = dyn_cast<GetElementPtrInst>(Val: GEP.getPointerOperand());
2793 while (true) {
2794 if (!InnerGEP)
2795 return nullptr;
2796
2797 NW = NW.intersectForReassociate(Other: InnerGEP->getNoWrapFlags());
2798 if (InnerGEP->hasAllConstantIndices())
2799 break;
2800
2801 if (!InnerGEP->hasOneUse())
2802 return nullptr;
2803
2804 Skipped.push_back(Elt: InnerGEP);
2805 InnerGEP = dyn_cast<GetElementPtrInst>(Val: InnerGEP->getPointerOperand());
2806 }
2807
2808 // The two constant offset GEPs are directly adjacent: Let normal offset
2809 // merging handle it.
2810 if (Skipped.empty())
2811 return nullptr;
2812
2813 // FIXME: This one-use check is not strictly necessary. Consider relaxing it
2814 // if profitable.
2815 if (!InnerGEP->hasOneUse())
2816 return nullptr;
2817
2818 // Don't bother with vector splats.
2819 Type *Ty = GEP.getType();
2820 if (InnerGEP->getType() != Ty)
2821 return nullptr;
2822
2823 const DataLayout &DL = IC.getDataLayout();
2824 APInt Offset(DL.getIndexTypeSizeInBits(Ty), 0);
2825 if (!GEP.accumulateConstantOffset(DL, Offset) ||
2826 !InnerGEP->accumulateConstantOffset(DL, Offset))
2827 return nullptr;
2828
2829 IC.replaceOperand(I&: *Skipped.back(), OpNum: 0, V: InnerGEP->getPointerOperand());
2830 for (GetElementPtrInst *SkippedGEP : Skipped)
2831 SkippedGEP->setNoWrapFlags(NW);
2832
2833 return IC.replaceInstUsesWith(
2834 I&: GEP,
2835 V: IC.Builder.CreatePtrAdd(Ptr: Skipped.front(), Offset: IC.Builder.getInt(AI: Offset), Name: "",
2836 NW: NW.intersectForOffsetAdd(Other: GEP.getNoWrapFlags())));
2837}
2838
2839Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
2840 GEPOperator *Src) {
2841 // Combine Indices - If the source pointer to this getelementptr instruction
2842 // is a getelementptr instruction with matching element type, combine the
2843 // indices of the two getelementptr instructions into a single instruction.
2844 if (!shouldMergeGEPs(GEP&: *cast<GEPOperator>(Val: &GEP), Src&: *Src))
2845 return nullptr;
2846
2847 if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, IC&: *this))
2848 return I;
2849
2850 if (auto *I = combineConstantOffsets(GEP, IC&: *this))
2851 return I;
2852
2853 if (Src->getResultElementType() != GEP.getSourceElementType())
2854 return nullptr;
2855
2856 // Fold chained GEP with constant base into single GEP:
2857 // gep i8, (gep i8, %base, C1), (select Cond, C2, C3)
2858 // -> gep i8, %base, (select Cond, C1+C2, C1+C3)
2859 if (Src->hasOneUse() && GEP.getNumIndices() == 1 &&
2860 Src->getNumIndices() == 1) {
2861 Value *SrcIdx = *Src->idx_begin();
2862 Value *GEPIdx = *GEP.idx_begin();
2863 const APInt *ConstOffset, *TrueVal, *FalseVal;
2864 Value *Cond;
2865
2866 if ((match(V: SrcIdx, P: m_APInt(Res&: ConstOffset)) &&
2867 match(V: GEPIdx,
2868 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal)))) ||
2869 (match(V: GEPIdx, P: m_APInt(Res&: ConstOffset)) &&
2870 match(V: SrcIdx,
2871 P: m_Select(C: m_Value(V&: Cond), L: m_APInt(Res&: TrueVal), R: m_APInt(Res&: FalseVal))))) {
2872 auto *Select = isa<SelectInst>(Val: GEPIdx) ? cast<SelectInst>(Val: GEPIdx)
2873 : cast<SelectInst>(Val: SrcIdx);
2874
2875 // Make sure the select has only one use.
2876 if (!Select->hasOneUse())
2877 return nullptr;
2878
2879 if (TrueVal->getBitWidth() != ConstOffset->getBitWidth() ||
2880 FalseVal->getBitWidth() != ConstOffset->getBitWidth())
2881 return nullptr;
2882
2883 APInt NewTrueVal = *ConstOffset + *TrueVal;
2884 APInt NewFalseVal = *ConstOffset + *FalseVal;
2885 Constant *NewTrue = ConstantInt::get(Ty: Select->getType(), V: NewTrueVal);
2886 Constant *NewFalse = ConstantInt::get(Ty: Select->getType(), V: NewFalseVal);
2887 Value *NewSelect = Builder.CreateSelect(
2888 C: Cond, True: NewTrue, False: NewFalse, /*Name=*/"",
2889 /*MDFrom=*/(ProfcheckDisableMetadataFixes ? nullptr : Select));
2890 GEPNoWrapFlags Flags =
2891 getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP));
2892 return replaceInstUsesWith(I&: GEP,
2893 V: Builder.CreateGEP(Ty: GEP.getResultElementType(),
2894 Ptr: Src->getPointerOperand(),
2895 IdxList: NewSelect, Name: "", NW: Flags));
2896 }
2897 }
2898
2899 // Find out whether the last index in the source GEP is a sequential idx.
2900 bool EndsWithSequential = false;
2901 for (gep_type_iterator I = gep_type_begin(GEP: *Src), E = gep_type_end(GEP: *Src);
2902 I != E; ++I)
2903 EndsWithSequential = I.isSequential();
2904 if (!EndsWithSequential)
2905 return nullptr;
2906
2907 // Replace: gep (gep %P, long B), long A, ...
2908 // With: T = long A+B; gep %P, T, ...
2909 Value *SO1 = Src->getOperand(i_nocapture: Src->getNumOperands() - 1);
2910 Value *GO1 = GEP.getOperand(i_nocapture: 1);
2911
2912 // If they aren't the same type, then the input hasn't been processed
2913 // by the loop above yet (which canonicalizes sequential index types to
2914 // intptr_t). Just avoid transforming this until the input has been
2915 // normalized.
2916 if (SO1->getType() != GO1->getType())
2917 return nullptr;
2918
2919 Value *Sum =
2920 simplifyAddInst(LHS: GO1, RHS: SO1, IsNSW: false, IsNUW: false, Q: SQ.getWithInstruction(I: &GEP));
2921 // Only do the combine when we are sure the cost after the
2922 // merge is never more than that before the merge.
2923 if (Sum == nullptr)
2924 return nullptr;
2925
2926 SmallVector<Value *, 8> Indices;
2927 Indices.append(in_start: Src->op_begin() + 1, in_end: Src->op_end() - 1);
2928 Indices.push_back(Elt: Sum);
2929 Indices.append(in_start: GEP.op_begin() + 2, in_end: GEP.op_end());
2930
2931 // Don't create GEPs with more than one non-zero index.
2932 unsigned NumNonZeroIndices = count_if(Range&: Indices, P: [](Value *Idx) {
2933 auto *C = dyn_cast<Constant>(Val: Idx);
2934 return !C || !C->isNullValue();
2935 });
2936 if (NumNonZeroIndices > 1)
2937 return nullptr;
2938
2939 return replaceInstUsesWith(
2940 I&: GEP, V: Builder.CreateGEP(
2941 Ty: Src->getSourceElementType(), Ptr: Src->getOperand(i_nocapture: 0), IdxList: Indices, Name: "",
2942 NW: getMergedGEPNoWrapFlags(GEP1&: *Src, GEP2&: *cast<GEPOperator>(Val: &GEP))));
2943}
2944
2945Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
2946 BuilderTy *Builder,
2947 bool &DoesConsume, unsigned Depth) {
2948 static Value *const NonNull = reinterpret_cast<Value *>(uintptr_t(1));
2949 // ~(~(X)) -> X.
2950 Value *A, *B;
2951 if (match(V, P: m_Not(V: m_Value(V&: A)))) {
2952 DoesConsume = true;
2953 return A;
2954 }
2955
2956 Constant *C;
2957 // Constants can be considered to be not'ed values.
2958 if (match(V, P: m_ImmConstant(C)))
2959 return ConstantExpr::getNot(C);
2960
2961 if (Depth++ >= MaxAnalysisRecursionDepth)
2962 return nullptr;
2963
2964 // The rest of the cases require that we invert all uses so don't bother
2965 // doing the analysis if we know we can't use the result.
2966 if (!WillInvertAllUses)
2967 return nullptr;
2968
2969 // Compares can be inverted if all of their uses are being modified to use
2970 // the ~V.
2971 if (auto *I = dyn_cast<CmpInst>(Val: V)) {
2972 if (Builder != nullptr)
2973 return Builder->CreateCmp(Pred: I->getInversePredicate(), LHS: I->getOperand(i_nocapture: 0),
2974 RHS: I->getOperand(i_nocapture: 1));
2975 return NonNull;
2976 }
2977
2978 // If `V` is of the form `A + B` then `-1 - V` can be folded into
2979 // `(-1 - B) - A` if we are willing to invert all of the uses.
2980 if (match(V, P: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2981 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2982 DoesConsume, Depth))
2983 return Builder ? Builder->CreateSub(LHS: BV, RHS: A) : NonNull;
2984 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2985 DoesConsume, Depth))
2986 return Builder ? Builder->CreateSub(LHS: AV, RHS: B) : NonNull;
2987 return nullptr;
2988 }
2989
2990 // If `V` is of the form `A ^ ~B` then `~(A ^ ~B)` can be folded
2991 // into `A ^ B` if we are willing to invert all of the uses.
2992 if (match(V, P: m_Xor(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2993 if (auto *BV = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
2994 DoesConsume, Depth))
2995 return Builder ? Builder->CreateXor(LHS: A, RHS: BV) : NonNull;
2996 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
2997 DoesConsume, Depth))
2998 return Builder ? Builder->CreateXor(LHS: AV, RHS: B) : NonNull;
2999 return nullptr;
3000 }
3001
3002 // If `V` is of the form `B - A` then `-1 - V` can be folded into
3003 // `A + (-1 - B)` if we are willing to invert all of the uses.
3004 if (match(V, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3005 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3006 DoesConsume, Depth))
3007 return Builder ? Builder->CreateAdd(LHS: AV, RHS: B) : NonNull;
3008 return nullptr;
3009 }
3010
3011 // If `V` is of the form `(~A) s>> B` then `~((~A) s>> B)` can be folded
3012 // into `A s>> B` if we are willing to invert all of the uses.
3013 if (match(V, P: m_AShr(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3014 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3015 DoesConsume, Depth))
3016 return Builder ? Builder->CreateAShr(LHS: AV, RHS: B) : NonNull;
3017 return nullptr;
3018 }
3019
3020 Value *Cond;
3021 // LogicOps are special in that we canonicalize them at the cost of an
3022 // instruction.
3023 bool IsSelect = match(V, P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: A), R: m_Value(V&: B))) &&
3024 !shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: V));
3025 // Selects/min/max with invertible operands are freely invertible
3026 if (IsSelect || match(V, P: m_MaxOrMin(L: m_Value(V&: A), R: m_Value(V&: B)))) {
3027 bool LocalDoesConsume = DoesConsume;
3028 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder*/ nullptr,
3029 DoesConsume&: LocalDoesConsume, Depth))
3030 return nullptr;
3031 if (Value *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3032 DoesConsume&: LocalDoesConsume, Depth)) {
3033 DoesConsume = LocalDoesConsume;
3034 if (Builder != nullptr) {
3035 Value *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3036 DoesConsume, Depth);
3037 assert(NotB != nullptr &&
3038 "Unable to build inverted value for known freely invertable op");
3039 if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
3040 return Builder->CreateBinaryIntrinsic(
3041 ID: getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID()), LHS: NotA, RHS: NotB);
3042 return Builder->CreateSelect(
3043 C: Cond, True: NotA, False: NotB, Name: "",
3044 MDFrom: ProfcheckDisableMetadataFixes ? nullptr : cast<Instruction>(Val: V));
3045 }
3046 return NonNull;
3047 }
3048 }
3049
3050 if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
3051 bool LocalDoesConsume = DoesConsume;
3052 SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
3053 for (Use &U : PN->operands()) {
3054 BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
3055 Value *NewIncomingVal = getFreelyInvertedImpl(
3056 V: U.get(), /*WillInvertAllUses=*/false,
3057 /*Builder=*/nullptr, DoesConsume&: LocalDoesConsume, Depth: MaxAnalysisRecursionDepth - 1);
3058 if (NewIncomingVal == nullptr)
3059 return nullptr;
3060 // Make sure that we can safely erase the original PHI node.
3061 if (NewIncomingVal == V)
3062 return nullptr;
3063 if (Builder != nullptr)
3064 IncomingValues.emplace_back(Args&: NewIncomingVal, Args&: IncomingBlock);
3065 }
3066
3067 DoesConsume = LocalDoesConsume;
3068 if (Builder != nullptr) {
3069 IRBuilderBase::InsertPointGuard Guard(*Builder);
3070 Builder->SetInsertPoint(PN);
3071 PHINode *NewPN =
3072 Builder->CreatePHI(Ty: PN->getType(), NumReservedValues: PN->getNumIncomingValues());
3073 for (auto [Val, Pred] : IncomingValues)
3074 NewPN->addIncoming(V: Val, BB: Pred);
3075 return NewPN;
3076 }
3077 return NonNull;
3078 }
3079
3080 if (match(V, P: m_SExtLike(Op: m_Value(V&: A)))) {
3081 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3082 DoesConsume, Depth))
3083 return Builder ? Builder->CreateSExt(V: AV, DestTy: V->getType()) : NonNull;
3084 return nullptr;
3085 }
3086
3087 if (match(V, P: m_Trunc(Op: m_Value(V&: A)))) {
3088 if (auto *AV = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3089 DoesConsume, Depth))
3090 return Builder ? Builder->CreateTrunc(V: AV, DestTy: V->getType()) : NonNull;
3091 return nullptr;
3092 }
3093
3094 // De Morgan's Laws:
3095 // (~(A | B)) -> (~A & ~B)
3096 // (~(A & B)) -> (~A | ~B)
3097 auto TryInvertAndOrUsingDeMorgan = [&](Instruction::BinaryOps Opcode,
3098 bool IsLogical, Value *A,
3099 Value *B) -> Value * {
3100 bool LocalDoesConsume = DoesConsume;
3101 if (!getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), /*Builder=*/nullptr,
3102 DoesConsume&: LocalDoesConsume, Depth))
3103 return nullptr;
3104 if (auto *NotA = getFreelyInvertedImpl(V: A, WillInvertAllUses: A->hasOneUse(), Builder,
3105 DoesConsume&: LocalDoesConsume, Depth)) {
3106 auto *NotB = getFreelyInvertedImpl(V: B, WillInvertAllUses: B->hasOneUse(), Builder,
3107 DoesConsume&: LocalDoesConsume, Depth);
3108 DoesConsume = LocalDoesConsume;
3109 if (IsLogical)
3110 return Builder ? Builder->CreateLogicalOp(Opc: Opcode, Cond1: NotA, Cond2: NotB) : NonNull;
3111 return Builder ? Builder->CreateBinOp(Opc: Opcode, LHS: NotA, RHS: NotB) : NonNull;
3112 }
3113
3114 return nullptr;
3115 };
3116
3117 if (match(V, P: m_Or(L: m_Value(V&: A), R: m_Value(V&: B))))
3118 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/false, A,
3119 B);
3120
3121 if (match(V, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))))
3122 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/false, A,
3123 B);
3124
3125 if (match(V, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))
3126 return TryInvertAndOrUsingDeMorgan(Instruction::And, /*IsLogical=*/true, A,
3127 B);
3128
3129 if (match(V, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B))))
3130 return TryInvertAndOrUsingDeMorgan(Instruction::Or, /*IsLogical=*/true, A,
3131 B);
3132
3133 return nullptr;
3134}
3135
3136/// Return true if we should canonicalize the gep to an i8 ptradd.
3137static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
3138 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3139 Type *GEPEltType = GEP.getSourceElementType();
3140 if (GEPEltType->isIntegerTy(Bitwidth: 8))
3141 return false;
3142
3143 // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
3144 // intrinsic. This has better support in BasicAA.
3145 if (GEPEltType->isScalableTy())
3146 return true;
3147
3148 // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
3149 // together.
3150 if (GEP.getNumIndices() == 1 &&
3151 match(V: GEP.getOperand(i_nocapture: 1),
3152 P: m_OneUse(SubPattern: m_CombineOr(L: m_Mul(L: m_Value(), R: m_ConstantInt()),
3153 R: m_Shl(L: m_Value(), R: m_ConstantInt())))))
3154 return true;
3155
3156 // gep (gep %p, C1), %x, C2 is expanded so the two constants can
3157 // possibly be merged together.
3158 auto PtrOpGep = dyn_cast<GEPOperator>(Val: PtrOp);
3159 return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
3160 any_of(Range: GEP.indices(), P: [](Value *V) {
3161 const APInt *C;
3162 return match(V, P: m_APInt(Res&: C)) && !C->isZero();
3163 });
3164}
3165
3166static Instruction *foldGEPOfPhi(GetElementPtrInst &GEP, PHINode *PN,
3167 IRBuilderBase &Builder) {
3168 auto *Op1 = dyn_cast<GetElementPtrInst>(Val: PN->getOperand(i_nocapture: 0));
3169 if (!Op1)
3170 return nullptr;
3171
3172 // Don't fold a GEP into itself through a PHI node. This can only happen
3173 // through the back-edge of a loop. Folding a GEP into itself means that
3174 // the value of the previous iteration needs to be stored in the meantime,
3175 // thus requiring an additional register variable to be live, but not
3176 // actually achieving anything (the GEP still needs to be executed once per
3177 // loop iteration).
3178 if (Op1 == &GEP)
3179 return nullptr;
3180 GEPNoWrapFlags NW = Op1->getNoWrapFlags();
3181
3182 int DI = -1;
3183
3184 for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
3185 auto *Op2 = dyn_cast<GetElementPtrInst>(Val&: *I);
3186 if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands() ||
3187 Op1->getSourceElementType() != Op2->getSourceElementType())
3188 return nullptr;
3189
3190 // As for Op1 above, don't try to fold a GEP into itself.
3191 if (Op2 == &GEP)
3192 return nullptr;
3193
3194 // Keep track of the type as we walk the GEP.
3195 Type *CurTy = nullptr;
3196
3197 for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
3198 if (Op1->getOperand(i_nocapture: J)->getType() != Op2->getOperand(i_nocapture: J)->getType())
3199 return nullptr;
3200
3201 if (Op1->getOperand(i_nocapture: J) != Op2->getOperand(i_nocapture: J)) {
3202 if (DI == -1) {
3203 // We have not seen any differences yet in the GEPs feeding the
3204 // PHI yet, so we record this one if it is allowed to be a
3205 // variable.
3206
3207 // The first two arguments can vary for any GEP, the rest have to be
3208 // static for struct slots
3209 if (J > 1) {
3210 assert(CurTy && "No current type?");
3211 if (CurTy->isStructTy())
3212 return nullptr;
3213 }
3214
3215 DI = J;
3216 } else {
3217 // The GEP is different by more than one input. While this could be
3218 // extended to support GEPs that vary by more than one variable it
3219 // doesn't make sense since it greatly increases the complexity and
3220 // would result in an R+R+R addressing mode which no backend
3221 // directly supports and would need to be broken into several
3222 // simpler instructions anyway.
3223 return nullptr;
3224 }
3225 }
3226
3227 // Sink down a layer of the type for the next iteration.
3228 if (J > 0) {
3229 if (J == 1) {
3230 CurTy = Op1->getSourceElementType();
3231 } else {
3232 CurTy =
3233 GetElementPtrInst::getTypeAtIndex(Ty: CurTy, Idx: Op1->getOperand(i_nocapture: J));
3234 }
3235 }
3236 }
3237
3238 NW &= Op2->getNoWrapFlags();
3239 }
3240
3241 // If not all GEPs are identical we'll have to create a new PHI node.
3242 // Check that the old PHI node has only one use so that it will get
3243 // removed.
3244 if (DI != -1 && !PN->hasOneUse())
3245 return nullptr;
3246
3247 auto *NewGEP = cast<GetElementPtrInst>(Val: Op1->clone());
3248 NewGEP->setNoWrapFlags(NW);
3249
3250 if (DI == -1) {
3251 // All the GEPs feeding the PHI are identical. Clone one down into our
3252 // BB so that it can be merged with the current GEP.
3253 } else {
3254 // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
3255 // into the current block so it can be merged, and create a new PHI to
3256 // set that index.
3257 PHINode *NewPN;
3258 {
3259 IRBuilderBase::InsertPointGuard Guard(Builder);
3260 Builder.SetInsertPoint(PN);
3261 NewPN = Builder.CreatePHI(Ty: Op1->getOperand(i_nocapture: DI)->getType(),
3262 NumReservedValues: PN->getNumOperands());
3263 }
3264
3265 for (auto &I : PN->operands())
3266 NewPN->addIncoming(V: cast<GEPOperator>(Val&: I)->getOperand(i_nocapture: DI),
3267 BB: PN->getIncomingBlock(U: I));
3268
3269 NewGEP->setOperand(i_nocapture: DI, Val_nocapture: NewPN);
3270 }
3271
3272 NewGEP->insertBefore(BB&: *GEP.getParent(), InsertPos: GEP.getParent()->getFirstInsertionPt());
3273 return NewGEP;
3274}
3275
3276Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
3277 Value *PtrOp = GEP.getOperand(i_nocapture: 0);
3278 SmallVector<Value *, 8> Indices(GEP.indices());
3279 Type *GEPType = GEP.getType();
3280 Type *GEPEltType = GEP.getSourceElementType();
3281 if (Value *V =
3282 simplifyGEPInst(SrcTy: GEPEltType, Ptr: PtrOp, Indices, NW: GEP.getNoWrapFlags(),
3283 Q: SQ.getWithInstruction(I: &GEP)))
3284 return replaceInstUsesWith(I&: GEP, V);
3285
3286 // For vector geps, use the generic demanded vector support.
3287 // Skip if GEP return type is scalable. The number of elements is unknown at
3288 // compile-time.
3289 if (auto *GEPFVTy = dyn_cast<FixedVectorType>(Val: GEPType)) {
3290 auto VWidth = GEPFVTy->getNumElements();
3291 APInt PoisonElts(VWidth, 0);
3292 APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
3293 if (Value *V = SimplifyDemandedVectorElts(V: &GEP, DemandedElts: AllOnesEltMask,
3294 PoisonElts)) {
3295 if (V != &GEP)
3296 return replaceInstUsesWith(I&: GEP, V);
3297 return &GEP;
3298 }
3299 }
3300
3301 // Eliminate unneeded casts for indices, and replace indices which displace
3302 // by multiples of a zero size type with zero.
3303 bool MadeChange = false;
3304
3305 // Index width may not be the same width as pointer width.
3306 // Data layout chooses the right type based on supported integer types.
3307 Type *NewScalarIndexTy =
3308 DL.getIndexType(PtrTy: GEP.getPointerOperandType()->getScalarType());
3309
3310 gep_type_iterator GTI = gep_type_begin(GEP);
3311 for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
3312 ++I, ++GTI) {
3313 // Skip indices into struct types.
3314 if (GTI.isStruct())
3315 continue;
3316
3317 Type *IndexTy = (*I)->getType();
3318 Type *NewIndexType =
3319 IndexTy->isVectorTy()
3320 ? VectorType::get(ElementType: NewScalarIndexTy,
3321 EC: cast<VectorType>(Val: IndexTy)->getElementCount())
3322 : NewScalarIndexTy;
3323
3324 // If the element type has zero size then any index over it is equivalent
3325 // to an index of zero, so replace it with zero if it is not zero already.
3326 Type *EltTy = GTI.getIndexedType();
3327 if (EltTy->isSized() && DL.getTypeAllocSize(Ty: EltTy).isZero())
3328 if (!isa<Constant>(Val: *I) || !match(V: I->get(), P: m_Zero())) {
3329 *I = Constant::getNullValue(Ty: NewIndexType);
3330 MadeChange = true;
3331 }
3332
3333 if (IndexTy != NewIndexType) {
3334 // If we are using a wider index than needed for this platform, shrink
3335 // it to what we need. If narrower, sign-extend it to what we need.
3336 // This explicit cast can make subsequent optimizations more obvious.
3337 if (IndexTy->getScalarSizeInBits() <
3338 NewIndexType->getScalarSizeInBits()) {
3339 if (GEP.hasNoUnsignedWrap() && GEP.hasNoUnsignedSignedWrap())
3340 *I = Builder.CreateZExt(V: *I, DestTy: NewIndexType, Name: "", /*IsNonNeg=*/true);
3341 else
3342 *I = Builder.CreateSExt(V: *I, DestTy: NewIndexType);
3343 } else {
3344 *I = Builder.CreateTrunc(V: *I, DestTy: NewIndexType, Name: "", IsNUW: GEP.hasNoUnsignedWrap(),
3345 IsNSW: GEP.hasNoUnsignedSignedWrap());
3346 }
3347 MadeChange = true;
3348 }
3349 }
3350 if (MadeChange)
3351 return &GEP;
3352
3353 // Canonicalize constant GEPs to i8 type.
3354 if (!GEPEltType->isIntegerTy(Bitwidth: 8) && GEP.hasAllConstantIndices()) {
3355 APInt Offset(DL.getIndexTypeSizeInBits(Ty: GEPType), 0);
3356 if (GEP.accumulateConstantOffset(DL, Offset))
3357 return replaceInstUsesWith(
3358 I&: GEP, V: Builder.CreatePtrAdd(Ptr: PtrOp, Offset: Builder.getInt(AI: Offset), Name: "",
3359 NW: GEP.getNoWrapFlags()));
3360 }
3361
3362 if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
3363 Value *Offset = EmitGEPOffset(GEP: cast<GEPOperator>(Val: &GEP));
3364 Value *NewGEP =
3365 Builder.CreatePtrAdd(Ptr: PtrOp, Offset, Name: "", NW: GEP.getNoWrapFlags());
3366 return replaceInstUsesWith(I&: GEP, V: NewGEP);
3367 }
3368
3369 // Strip trailing zero indices.
3370 auto *LastIdx = dyn_cast<Constant>(Val: Indices.back());
3371 if (LastIdx && LastIdx->isNullValue() && !LastIdx->getType()->isVectorTy()) {
3372 return replaceInstUsesWith(
3373 I&: GEP, V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: PtrOp,
3374 IdxList: drop_end(RangeOrContainer&: Indices), Name: "", NW: GEP.getNoWrapFlags()));
3375 }
3376
3377 // Strip leading zero indices.
3378 auto *FirstIdx = dyn_cast<Constant>(Val: Indices.front());
3379 if (FirstIdx && FirstIdx->isNullValue() &&
3380 !FirstIdx->getType()->isVectorTy()) {
3381 gep_type_iterator GTI = gep_type_begin(GEP);
3382 ++GTI;
3383 if (!GTI.isStruct())
3384 return replaceInstUsesWith(I&: GEP, V: Builder.CreateGEP(Ty: GTI.getIndexedType(),
3385 Ptr: GEP.getPointerOperand(),
3386 IdxList: drop_begin(RangeOrContainer&: Indices), Name: "",
3387 NW: GEP.getNoWrapFlags()));
3388 }
3389
3390 // Scalarize vector operands; prefer splat-of-gep.as canonical form.
3391 // Note that this looses information about undef lanes; we run it after
3392 // demanded bits to partially mitigate that loss.
3393 if (GEPType->isVectorTy() && llvm::any_of(Range: GEP.operands(), P: [](Value *Op) {
3394 return Op->getType()->isVectorTy() && getSplatValue(V: Op);
3395 })) {
3396 SmallVector<Value *> NewOps;
3397 for (auto &Op : GEP.operands()) {
3398 if (Op->getType()->isVectorTy())
3399 if (Value *Scalar = getSplatValue(V: Op)) {
3400 NewOps.push_back(Elt: Scalar);
3401 continue;
3402 }
3403 NewOps.push_back(Elt: Op);
3404 }
3405
3406 Value *Res = Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewOps[0],
3407 IdxList: ArrayRef(NewOps).drop_front(), Name: GEP.getName(),
3408 NW: GEP.getNoWrapFlags());
3409 if (!Res->getType()->isVectorTy()) {
3410 ElementCount EC = cast<VectorType>(Val: GEPType)->getElementCount();
3411 Res = Builder.CreateVectorSplat(EC, V: Res);
3412 }
3413 return replaceInstUsesWith(I&: GEP, V: Res);
3414 }
3415
3416 bool SeenNonZeroIndex = false;
3417 for (auto [IdxNum, Idx] : enumerate(First&: Indices)) {
3418 auto *C = dyn_cast<Constant>(Val: Idx);
3419 if (C && C->isNullValue())
3420 continue;
3421
3422 if (!SeenNonZeroIndex) {
3423 SeenNonZeroIndex = true;
3424 continue;
3425 }
3426
3427 // GEP has multiple non-zero indices: Split it.
3428 ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(N: IdxNum);
3429 Value *FrontGEP =
3430 Builder.CreateGEP(Ty: GEPEltType, Ptr: PtrOp, IdxList: FrontIndices,
3431 Name: GEP.getName() + ".split", NW: GEP.getNoWrapFlags());
3432
3433 SmallVector<Value *> BackIndices;
3434 BackIndices.push_back(Elt: Constant::getNullValue(Ty: NewScalarIndexTy));
3435 append_range(C&: BackIndices, R: drop_begin(RangeOrContainer&: Indices, N: IdxNum));
3436 return GetElementPtrInst::Create(
3437 PointeeType: GetElementPtrInst::getIndexedType(Ty: GEPEltType, IdxList: FrontIndices), Ptr: FrontGEP,
3438 IdxList: BackIndices, NW: GEP.getNoWrapFlags());
3439 }
3440
3441 // Check to see if the inputs to the PHI node are getelementptr instructions.
3442 if (auto *PN = dyn_cast<PHINode>(Val: PtrOp)) {
3443 if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
3444 return replaceOperand(I&: GEP, OpNum: 0, V: NewPtrOp);
3445 }
3446
3447 if (auto *Src = dyn_cast<GEPOperator>(Val: PtrOp))
3448 if (Instruction *I = visitGEPOfGEP(GEP, Src))
3449 return I;
3450
3451 if (GEP.getNumIndices() == 1) {
3452 unsigned AS = GEP.getPointerAddressSpace();
3453 if (GEP.getOperand(i_nocapture: 1)->getType()->getScalarSizeInBits() ==
3454 DL.getIndexSizeInBits(AS)) {
3455 uint64_t TyAllocSize = DL.getTypeAllocSize(Ty: GEPEltType).getFixedValue();
3456
3457 if (TyAllocSize == 1) {
3458 // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) to (bitcast Y),
3459 // but only if the result pointer is only used as if it were an integer.
3460 // (The case where the underlying object is the same is handled by
3461 // InstSimplify.)
3462 Value *X = GEP.getPointerOperand();
3463 Value *Y;
3464 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_Sub(L: m_PtrToIntOrAddr(Op: m_Value(V&: Y)),
3465 R: m_PtrToIntOrAddr(Op: m_Specific(V: X)))) &&
3466 GEPType == Y->getType()) {
3467 bool HasNonAddressBits =
3468 DL.getAddressSizeInBits(AS) != DL.getPointerSizeInBits(AS);
3469 bool Changed = false;
3470 GEP.replaceUsesWithIf(New: Y, ShouldReplace: [&](Use &U) {
3471 bool ShouldReplace =
3472 isa<PtrToAddrInst, ICmpInst>(Val: U.getUser()) ||
3473 (!HasNonAddressBits && isa<PtrToIntInst>(Val: U.getUser()));
3474 Changed |= ShouldReplace;
3475 return ShouldReplace;
3476 });
3477 return Changed ? &GEP : nullptr;
3478 }
3479 } else if (auto *ExactIns =
3480 dyn_cast<PossiblyExactOperator>(Val: GEP.getOperand(i_nocapture: 1))) {
3481 // Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
3482 Value *V;
3483 if (ExactIns->isExact()) {
3484 if ((has_single_bit(Value: TyAllocSize) &&
3485 match(V: GEP.getOperand(i_nocapture: 1),
3486 P: m_Shr(L: m_Value(V),
3487 R: m_SpecificInt(V: countr_zero(Val: TyAllocSize))))) ||
3488 match(V: GEP.getOperand(i_nocapture: 1),
3489 P: m_IDiv(L: m_Value(V), R: m_SpecificInt(V: TyAllocSize)))) {
3490 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3491 Ptr: GEP.getPointerOperand(), IdxList: V,
3492 NW: GEP.getNoWrapFlags());
3493 }
3494 }
3495 if (ExactIns->isExact() && ExactIns->hasOneUse()) {
3496 // Try to canonicalize non-i8 element type to i8 if the index is an
3497 // exact instruction. If the index is an exact instruction (div/shr)
3498 // with a constant RHS, we can fold the non-i8 element scale into the
3499 // div/shr (similiar to the mul case, just inverted).
3500 const APInt *C;
3501 std::optional<APInt> NewC;
3502 if (has_single_bit(Value: TyAllocSize) &&
3503 match(V: ExactIns, P: m_Shr(L: m_Value(V), R: m_APInt(Res&: C))) &&
3504 C->uge(RHS: countr_zero(Val: TyAllocSize)))
3505 NewC = *C - countr_zero(Val: TyAllocSize);
3506 else if (match(V: ExactIns, P: m_UDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3507 APInt Quot;
3508 uint64_t Rem;
3509 APInt::udivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3510 if (Rem == 0)
3511 NewC = Quot;
3512 } else if (match(V: ExactIns, P: m_SDiv(L: m_Value(V), R: m_APInt(Res&: C)))) {
3513 APInt Quot;
3514 int64_t Rem;
3515 APInt::sdivrem(LHS: *C, RHS: TyAllocSize, Quotient&: Quot, Remainder&: Rem);
3516 // For sdiv we need to make sure we arent creating INT_MIN / -1.
3517 if (!Quot.isAllOnes() && Rem == 0)
3518 NewC = Quot;
3519 }
3520
3521 if (NewC.has_value()) {
3522 Value *NewOp = Builder.CreateBinOp(
3523 Opc: static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), LHS: V,
3524 RHS: ConstantInt::get(Ty: V->getType(), V: *NewC));
3525 cast<BinaryOperator>(Val: NewOp)->setIsExact();
3526 return GetElementPtrInst::Create(PointeeType: Builder.getInt8Ty(),
3527 Ptr: GEP.getPointerOperand(), IdxList: NewOp,
3528 NW: GEP.getNoWrapFlags());
3529 }
3530 }
3531 }
3532 }
3533 }
3534 // We do not handle pointer-vector geps here.
3535 if (GEPType->isVectorTy())
3536 return nullptr;
3537
3538 if (!GEP.isInBounds()) {
3539 unsigned IdxWidth =
3540 DL.getIndexSizeInBits(AS: PtrOp->getType()->getPointerAddressSpace());
3541 APInt BasePtrOffset(IdxWidth, 0);
3542 Value *UnderlyingPtrOp =
3543 PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: BasePtrOffset);
3544 bool CanBeNull, CanBeFreed;
3545 uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
3546 DL, CanBeNull, CanBeFreed);
3547 if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
3548 if (GEP.accumulateConstantOffset(DL, Offset&: BasePtrOffset) &&
3549 BasePtrOffset.isNonNegative()) {
3550 APInt AllocSize(IdxWidth, DerefBytes);
3551 if (BasePtrOffset.ule(RHS: AllocSize)) {
3552 return GetElementPtrInst::CreateInBounds(
3553 PointeeType: GEP.getSourceElementType(), Ptr: PtrOp, IdxList: Indices, NameStr: GEP.getName());
3554 }
3555 }
3556 }
3557 }
3558
3559 // nusw + nneg -> nuw
3560 if (GEP.hasNoUnsignedSignedWrap() && !GEP.hasNoUnsignedWrap() &&
3561 all_of(Range: GEP.indices(), P: [&](Value *Idx) {
3562 return isKnownNonNegative(V: Idx, SQ: SQ.getWithInstruction(I: &GEP));
3563 })) {
3564 GEP.setNoWrapFlags(GEP.getNoWrapFlags() | GEPNoWrapFlags::noUnsignedWrap());
3565 return &GEP;
3566 }
3567
3568 // These rewrites are trying to preserve inbounds/nuw attributes. So we want
3569 // to do this after having tried to derive "nuw" above.
3570 if (GEP.getNumIndices() == 1) {
3571 // Given (gep p, x+y) we want to determine the common nowrap flags for both
3572 // geps if transforming into (gep (gep p, x), y).
3573 auto GetPreservedNoWrapFlags = [&](bool AddIsNUW) {
3574 // We can preserve both "inbounds nuw", "nusw nuw" and "nuw" if we know
3575 // that x + y does not have unsigned wrap.
3576 if (GEP.hasNoUnsignedWrap() && AddIsNUW)
3577 return GEP.getNoWrapFlags();
3578 return GEPNoWrapFlags::none();
3579 };
3580
3581 // Try to replace ADD + GEP with GEP + GEP.
3582 Value *Idx1, *Idx2;
3583 if (match(V: GEP.getOperand(i_nocapture: 1),
3584 P: m_OneUse(SubPattern: m_AddLike(L: m_Value(V&: Idx1), R: m_Value(V&: Idx2))))) {
3585 // %idx = add i64 %idx1, %idx2
3586 // %gep = getelementptr i32, ptr %ptr, i64 %idx
3587 // as:
3588 // %newptr = getelementptr i32, ptr %ptr, i64 %idx1
3589 // %newgep = getelementptr i32, ptr %newptr, i64 %idx2
3590 bool NUW = match(V: GEP.getOperand(i_nocapture: 1), P: m_NUWAddLike(L: m_Value(), R: m_Value()));
3591 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3592 auto *NewPtr =
3593 Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3594 IdxList: Idx1, Name: "", NW: NWFlags);
3595 return replaceInstUsesWith(I&: GEP,
3596 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(),
3597 Ptr: NewPtr, IdxList: Idx2, Name: "", NW: NWFlags));
3598 }
3599 ConstantInt *C;
3600 if (match(V: GEP.getOperand(i_nocapture: 1), P: m_OneUse(SubPattern: m_SExtLike(Op: m_OneUse(SubPattern: m_NSWAddLike(
3601 L: m_Value(V&: Idx1), R: m_ConstantInt(CI&: C))))))) {
3602 // %add = add nsw i32 %idx1, idx2
3603 // %sidx = sext i32 %add to i64
3604 // %gep = getelementptr i32, ptr %ptr, i64 %sidx
3605 // as:
3606 // %newptr = getelementptr i32, ptr %ptr, i32 %idx1
3607 // %newgep = getelementptr i32, ptr %newptr, i32 idx2
3608 bool NUW = match(V: GEP.getOperand(i_nocapture: 1),
3609 P: m_NNegZExt(Op: m_NUWAddLike(L: m_Value(), R: m_Value())));
3610 GEPNoWrapFlags NWFlags = GetPreservedNoWrapFlags(NUW);
3611 auto *NewPtr = Builder.CreateGEP(
3612 Ty: GEP.getSourceElementType(), Ptr: GEP.getPointerOperand(),
3613 IdxList: Builder.CreateSExt(V: Idx1, DestTy: GEP.getOperand(i_nocapture: 1)->getType()), Name: "", NW: NWFlags);
3614 return replaceInstUsesWith(
3615 I&: GEP,
3616 V: Builder.CreateGEP(Ty: GEP.getSourceElementType(), Ptr: NewPtr,
3617 IdxList: Builder.CreateSExt(V: C, DestTy: GEP.getOperand(i_nocapture: 1)->getType()),
3618 Name: "", NW: NWFlags));
3619 }
3620 }
3621
3622 if (Instruction *R = foldSelectGEP(GEP, Builder))
3623 return R;
3624
3625 return nullptr;
3626}
3627
3628static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
3629 Instruction *AI) {
3630 if (isa<ConstantPointerNull>(Val: V))
3631 return true;
3632 if (auto *LI = dyn_cast<LoadInst>(Val: V))
3633 return isa<GlobalVariable>(Val: LI->getPointerOperand());
3634 // Two distinct allocations will never be equal.
3635 return isAllocLikeFn(V, TLI: &TLI) && V != AI;
3636}
3637
3638/// Given a call CB which uses an address UsedV, return true if we can prove the
3639/// call's only possible effect is storing to V.
3640static bool isRemovableWrite(CallBase &CB, Value *UsedV,
3641 const TargetLibraryInfo &TLI) {
3642 if (!CB.use_empty())
3643 // TODO: add recursion if returned attribute is present
3644 return false;
3645
3646 if (CB.isTerminator())
3647 // TODO: remove implementation restriction
3648 return false;
3649
3650 if (!CB.willReturn() || !CB.doesNotThrow())
3651 return false;
3652
3653 // If the only possible side effect of the call is writing to the alloca,
3654 // and the result isn't used, we can safely remove any reads implied by the
3655 // call including those which might read the alloca itself.
3656 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: &CB, TLI);
3657 return Dest && Dest->Ptr == UsedV;
3658}
3659
3660static std::optional<ModRefInfo>
3661isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakTrackingVH> &Users,
3662 const TargetLibraryInfo &TLI, bool KnowInit) {
3663 SmallVector<Instruction*, 4> Worklist;
3664 const std::optional<StringRef> Family = getAllocationFamily(I: AI, TLI: &TLI);
3665 Worklist.push_back(Elt: AI);
3666 ModRefInfo Access = KnowInit ? ModRefInfo::NoModRef : ModRefInfo::Mod;
3667
3668 do {
3669 Instruction *PI = Worklist.pop_back_val();
3670 for (User *U : PI->users()) {
3671 Instruction *I = cast<Instruction>(Val: U);
3672 switch (I->getOpcode()) {
3673 default:
3674 // Give up the moment we see something we can't handle.
3675 return std::nullopt;
3676
3677 case Instruction::AddrSpaceCast:
3678 case Instruction::BitCast:
3679 case Instruction::GetElementPtr:
3680 Users.emplace_back(Args&: I);
3681 Worklist.push_back(Elt: I);
3682 continue;
3683
3684 case Instruction::ICmp: {
3685 ICmpInst *ICI = cast<ICmpInst>(Val: I);
3686 // We can fold eq/ne comparisons with null to false/true, respectively.
3687 // We also fold comparisons in some conditions provided the alloc has
3688 // not escaped (see isNeverEqualToUnescapedAlloc).
3689 if (!ICI->isEquality())
3690 return std::nullopt;
3691 unsigned OtherIndex = (ICI->getOperand(i_nocapture: 0) == PI) ? 1 : 0;
3692 if (!isNeverEqualToUnescapedAlloc(V: ICI->getOperand(i_nocapture: OtherIndex), TLI, AI))
3693 return std::nullopt;
3694
3695 // Do not fold compares to aligned_alloc calls, as they may have to
3696 // return null in case the required alignment cannot be satisfied,
3697 // unless we can prove that both alignment and size are valid.
3698 auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
3699 // Check if alignment and size of a call to aligned_alloc is valid,
3700 // that is alignment is a power-of-2 and the size is a multiple of the
3701 // alignment.
3702 const APInt *Alignment;
3703 const APInt *Size;
3704 return match(V: CB->getArgOperand(i: 0), P: m_APInt(Res&: Alignment)) &&
3705 match(V: CB->getArgOperand(i: 1), P: m_APInt(Res&: Size)) &&
3706 Alignment->isPowerOf2() && Size->urem(RHS: *Alignment).isZero();
3707 };
3708 auto *CB = dyn_cast<CallBase>(Val: AI);
3709 LibFunc TheLibFunc;
3710 if (CB && TLI.getLibFunc(FDecl: *CB->getCalledFunction(), F&: TheLibFunc) &&
3711 TLI.has(F: TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
3712 !AlignmentAndSizeKnownValid(CB))
3713 return std::nullopt;
3714 Users.emplace_back(Args&: I);
3715 continue;
3716 }
3717
3718 case Instruction::Call:
3719 // Ignore no-op and store intrinsics.
3720 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3721 switch (II->getIntrinsicID()) {
3722 default:
3723 return std::nullopt;
3724
3725 case Intrinsic::memmove:
3726 case Intrinsic::memcpy:
3727 case Intrinsic::memset: {
3728 MemIntrinsic *MI = cast<MemIntrinsic>(Val: II);
3729 if (MI->isVolatile())
3730 return std::nullopt;
3731 // Note: this could also be ModRef, but we can still interpret that
3732 // as just Mod in that case.
3733 ModRefInfo NewAccess =
3734 MI->getRawDest() == PI ? ModRefInfo::Mod : ModRefInfo::Ref;
3735 if ((Access & ~NewAccess) != ModRefInfo::NoModRef)
3736 return std::nullopt;
3737 Access |= NewAccess;
3738 [[fallthrough]];
3739 }
3740 case Intrinsic::assume:
3741 case Intrinsic::invariant_start:
3742 case Intrinsic::invariant_end:
3743 case Intrinsic::lifetime_start:
3744 case Intrinsic::lifetime_end:
3745 case Intrinsic::objectsize:
3746 Users.emplace_back(Args&: I);
3747 continue;
3748 case Intrinsic::launder_invariant_group:
3749 case Intrinsic::strip_invariant_group:
3750 Users.emplace_back(Args&: I);
3751 Worklist.push_back(Elt: I);
3752 continue;
3753 }
3754 }
3755
3756 if (Family && getFreedOperand(CB: cast<CallBase>(Val: I), TLI: &TLI) == PI &&
3757 getAllocationFamily(I, TLI: &TLI) == Family) {
3758 Users.emplace_back(Args&: I);
3759 continue;
3760 }
3761
3762 if (Family && getReallocatedOperand(CB: cast<CallBase>(Val: I)) == PI &&
3763 getAllocationFamily(I, TLI: &TLI) == Family) {
3764 Users.emplace_back(Args&: I);
3765 Worklist.push_back(Elt: I);
3766 continue;
3767 }
3768
3769 if (!isRefSet(MRI: Access) &&
3770 isRemovableWrite(CB&: *cast<CallBase>(Val: I), UsedV: PI, TLI)) {
3771 Access |= ModRefInfo::Mod;
3772 Users.emplace_back(Args&: I);
3773 continue;
3774 }
3775
3776 return std::nullopt;
3777
3778 case Instruction::Store: {
3779 StoreInst *SI = cast<StoreInst>(Val: I);
3780 if (SI->isVolatile() || SI->getPointerOperand() != PI)
3781 return std::nullopt;
3782 if (isRefSet(MRI: Access))
3783 return std::nullopt;
3784 Access |= ModRefInfo::Mod;
3785 Users.emplace_back(Args&: I);
3786 continue;
3787 }
3788
3789 case Instruction::Load: {
3790 LoadInst *LI = cast<LoadInst>(Val: I);
3791 if (LI->isVolatile() || LI->getPointerOperand() != PI)
3792 return std::nullopt;
3793 if (isModSet(MRI: Access))
3794 return std::nullopt;
3795 Access |= ModRefInfo::Ref;
3796 Users.emplace_back(Args&: I);
3797 continue;
3798 }
3799 }
3800 llvm_unreachable("missing a return?");
3801 }
3802 } while (!Worklist.empty());
3803
3804 assert(Access != ModRefInfo::ModRef);
3805 return Access;
3806}
3807
3808Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
3809 assert(isa<AllocaInst>(MI) || isRemovableAlloc(&cast<CallBase>(MI), &TLI));
3810
3811 // If we have a malloc call which is only used in any amount of comparisons to
3812 // null and free calls, delete the calls and replace the comparisons with true
3813 // or false as appropriate.
3814
3815 // This is based on the principle that we can substitute our own allocation
3816 // function (which will never return null) rather than knowledge of the
3817 // specific function being called. In some sense this can change the permitted
3818 // outputs of a program (when we convert a malloc to an alloca, the fact that
3819 // the allocation is now on the stack is potentially visible, for example),
3820 // but we believe in a permissible manner.
3821 SmallVector<WeakTrackingVH, 64> Users;
3822
3823 // If we are removing an alloca with a dbg.declare, insert dbg.value calls
3824 // before each store.
3825 SmallVector<DbgVariableRecord *, 8> DVRs;
3826 std::unique_ptr<DIBuilder> DIB;
3827 if (isa<AllocaInst>(Val: MI)) {
3828 findDbgUsers(V: &MI, DbgVariableRecords&: DVRs);
3829 DIB.reset(p: new DIBuilder(*MI.getModule(), /*AllowUnresolved=*/false));
3830 }
3831
3832 // Determine what getInitialValueOfAllocation would return without actually
3833 // allocating the result.
3834 bool KnowInitUndef = false;
3835 bool KnowInitZero = false;
3836 Constant *Init =
3837 getInitialValueOfAllocation(V: &MI, TLI: &TLI, Ty: Type::getInt8Ty(C&: MI.getContext()));
3838 if (Init) {
3839 if (isa<UndefValue>(Val: Init))
3840 KnowInitUndef = true;
3841 else if (Init->isNullValue())
3842 KnowInitZero = true;
3843 }
3844 // The various sanitizers don't actually return undef memory, but rather
3845 // memory initialized with special forms of runtime poison
3846 auto &F = *MI.getFunction();
3847 if (F.hasFnAttribute(Kind: Attribute::SanitizeMemory) ||
3848 F.hasFnAttribute(Kind: Attribute::SanitizeAddress))
3849 KnowInitUndef = false;
3850
3851 auto Removable =
3852 isAllocSiteRemovable(AI: &MI, Users, TLI, KnowInit: KnowInitZero | KnowInitUndef);
3853 if (Removable) {
3854 for (WeakTrackingVH &User : Users) {
3855 // Lowering all @llvm.objectsize and MTI calls first because they may use
3856 // a bitcast/GEP of the alloca we are removing.
3857 if (!User)
3858 continue;
3859
3860 Instruction *I = cast<Instruction>(Val: &*User);
3861
3862 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
3863 if (II->getIntrinsicID() == Intrinsic::objectsize) {
3864 SmallVector<Instruction *> InsertedInstructions;
3865 Value *Result = lowerObjectSizeCall(
3866 ObjectSize: II, DL, TLI: &TLI, AA, /*MustSucceed=*/true, InsertedInstructions: &InsertedInstructions);
3867 for (Instruction *Inserted : InsertedInstructions)
3868 Worklist.add(I: Inserted);
3869 replaceInstUsesWith(I&: *I, V: Result);
3870 eraseInstFromFunction(I&: *I);
3871 User = nullptr; // Skip examining in the next loop.
3872 continue;
3873 }
3874 if (auto *MTI = dyn_cast<MemTransferInst>(Val: I)) {
3875 if (KnowInitZero && isRefSet(MRI: *Removable)) {
3876 IRBuilderBase::InsertPointGuard Guard(Builder);
3877 Builder.SetInsertPoint(MTI);
3878 auto *M = Builder.CreateMemSet(
3879 Ptr: MTI->getRawDest(),
3880 Val: ConstantInt::get(Ty: Type::getInt8Ty(C&: MI.getContext()), V: 0),
3881 Size: MTI->getLength(), Align: MTI->getDestAlign());
3882 M->copyMetadata(SrcInst: *MTI);
3883 }
3884 }
3885 }
3886 }
3887 for (WeakTrackingVH &User : Users) {
3888 if (!User)
3889 continue;
3890
3891 Instruction *I = cast<Instruction>(Val: &*User);
3892
3893 if (ICmpInst *C = dyn_cast<ICmpInst>(Val: I)) {
3894 replaceInstUsesWith(I&: *C,
3895 V: ConstantInt::get(Ty: Type::getInt1Ty(C&: C->getContext()),
3896 V: C->isFalseWhenEqual()));
3897 } else if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
3898 for (auto *DVR : DVRs)
3899 if (DVR->isAddressOfVariable())
3900 ConvertDebugDeclareToDebugValue(DVR, SI, Builder&: *DIB);
3901 } else {
3902 // Casts, GEP, or anything else: we're about to delete this instruction,
3903 // so it can not have any valid uses.
3904 Constant *Replace;
3905 if (isa<LoadInst>(Val: I)) {
3906 assert(KnowInitZero || KnowInitUndef);
3907 Replace = KnowInitUndef ? UndefValue::get(T: I->getType())
3908 : Constant::getNullValue(Ty: I->getType());
3909 } else
3910 Replace = PoisonValue::get(T: I->getType());
3911 replaceInstUsesWith(I&: *I, V: Replace);
3912 }
3913 eraseInstFromFunction(I&: *I);
3914 }
3915
3916 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &MI)) {
3917 // Replace invoke with a NOP intrinsic to maintain the original CFG
3918 Module *M = II->getModule();
3919 Function *F = Intrinsic::getOrInsertDeclaration(M, id: Intrinsic::donothing);
3920 auto *NewII = InvokeInst::Create(
3921 Func: F, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(), Args: {}, NameStr: "", InsertBefore: II->getParent());
3922 NewII->setDebugLoc(II->getDebugLoc());
3923 }
3924
3925 // Remove debug intrinsics which describe the value contained within the
3926 // alloca. In addition to removing dbg.{declare,addr} which simply point to
3927 // the alloca, remove dbg.value(<alloca>, ..., DW_OP_deref)'s as well, e.g.:
3928 //
3929 // ```
3930 // define void @foo(i32 %0) {
3931 // %a = alloca i32 ; Deleted.
3932 // store i32 %0, i32* %a
3933 // dbg.value(i32 %0, "arg0") ; Not deleted.
3934 // dbg.value(i32* %a, "arg0", DW_OP_deref) ; Deleted.
3935 // call void @trivially_inlinable_no_op(i32* %a)
3936 // ret void
3937 // }
3938 // ```
3939 //
3940 // This may not be required if we stop describing the contents of allocas
3941 // using dbg.value(<alloca>, ..., DW_OP_deref), but we currently do this in
3942 // the LowerDbgDeclare utility.
3943 //
3944 // If there is a dead store to `%a` in @trivially_inlinable_no_op, the
3945 // "arg0" dbg.value may be stale after the call. However, failing to remove
3946 // the DW_OP_deref dbg.value causes large gaps in location coverage.
3947 //
3948 // FIXME: the Assignment Tracking project has now likely made this
3949 // redundant (and it's sometimes harmful).
3950 for (auto *DVR : DVRs)
3951 if (DVR->isAddressOfVariable() || DVR->getExpression()->startsWithDeref())
3952 DVR->eraseFromParent();
3953
3954 return eraseInstFromFunction(I&: MI);
3955 }
3956 return nullptr;
3957}
3958
3959/// Move the call to free before a NULL test.
3960///
3961/// Check if this free is accessed after its argument has been test
3962/// against NULL (property 0).
3963/// If yes, it is legal to move this call in its predecessor block.
3964///
3965/// The move is performed only if the block containing the call to free
3966/// will be removed, i.e.:
3967/// 1. it has only one predecessor P, and P has two successors
3968/// 2. it contains the call, noops, and an unconditional branch
3969/// 3. its successor is the same as its predecessor's successor
3970///
3971/// The profitability is out-of concern here and this function should
3972/// be called only if the caller knows this transformation would be
3973/// profitable (e.g., for code size).
3974static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
3975 const DataLayout &DL) {
3976 Value *Op = FI.getArgOperand(i: 0);
3977 BasicBlock *FreeInstrBB = FI.getParent();
3978 BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor();
3979
3980 // Validate part of constraint #1: Only one predecessor
3981 // FIXME: We can extend the number of predecessor, but in that case, we
3982 // would duplicate the call to free in each predecessor and it may
3983 // not be profitable even for code size.
3984 if (!PredBB)
3985 return nullptr;
3986
3987 // Validate constraint #2: Does this block contains only the call to
3988 // free, noops, and an unconditional branch?
3989 BasicBlock *SuccBB;
3990 Instruction *FreeInstrBBTerminator = FreeInstrBB->getTerminator();
3991 if (!match(V: FreeInstrBBTerminator, P: m_UnconditionalBr(Succ&: SuccBB)))
3992 return nullptr;
3993
3994 // If there are only 2 instructions in the block, at this point,
3995 // this is the call to free and unconditional.
3996 // If there are more than 2 instructions, check that they are noops
3997 // i.e., they won't hurt the performance of the generated code.
3998 if (FreeInstrBB->size() != 2) {
3999 for (const Instruction &Inst : FreeInstrBB->instructionsWithoutDebug()) {
4000 if (&Inst == &FI || &Inst == FreeInstrBBTerminator)
4001 continue;
4002 auto *Cast = dyn_cast<CastInst>(Val: &Inst);
4003 if (!Cast || !Cast->isNoopCast(DL))
4004 return nullptr;
4005 }
4006 }
4007 // Validate the rest of constraint #1 by matching on the pred branch.
4008 Instruction *TI = PredBB->getTerminator();
4009 BasicBlock *TrueBB, *FalseBB;
4010 CmpPredicate Pred;
4011 if (!match(V: TI, P: m_Br(C: m_ICmp(Pred,
4012 L: m_CombineOr(L: m_Specific(V: Op),
4013 R: m_Specific(V: Op->stripPointerCasts())),
4014 R: m_Zero()),
4015 T&: TrueBB, F&: FalseBB)))
4016 return nullptr;
4017 if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
4018 return nullptr;
4019
4020 // Validate constraint #3: Ensure the null case just falls through.
4021 if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
4022 return nullptr;
4023 assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
4024 "Broken CFG: missing edge from predecessor to successor");
4025
4026 // At this point, we know that everything in FreeInstrBB can be moved
4027 // before TI.
4028 for (Instruction &Instr : llvm::make_early_inc_range(Range&: *FreeInstrBB)) {
4029 if (&Instr == FreeInstrBBTerminator)
4030 break;
4031 Instr.moveBeforePreserving(MovePos: TI->getIterator());
4032 }
4033 assert(FreeInstrBB->size() == 1 &&
4034 "Only the branch instruction should remain");
4035
4036 // Now that we've moved the call to free before the NULL check, we have to
4037 // remove any attributes on its parameter that imply it's non-null, because
4038 // those attributes might have only been valid because of the NULL check, and
4039 // we can get miscompiles if we keep them. This is conservative if non-null is
4040 // also implied by something other than the NULL check, but it's guaranteed to
4041 // be correct, and the conservativeness won't matter in practice, since the
4042 // attributes are irrelevant for the call to free itself and the pointer
4043 // shouldn't be used after the call.
4044 AttributeList Attrs = FI.getAttributes();
4045 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0, Kind: Attribute::NonNull);
4046 Attribute Dereferenceable = Attrs.getParamAttr(ArgNo: 0, Kind: Attribute::Dereferenceable);
4047 if (Dereferenceable.isValid()) {
4048 uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
4049 Attrs = Attrs.removeParamAttribute(C&: FI.getContext(), ArgNo: 0,
4050 Kind: Attribute::Dereferenceable);
4051 Attrs = Attrs.addDereferenceableOrNullParamAttr(C&: FI.getContext(), ArgNo: 0, Bytes);
4052 }
4053 FI.setAttributes(Attrs);
4054
4055 return &FI;
4056}
4057
4058Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
4059 // free undef -> unreachable.
4060 if (isa<UndefValue>(Val: Op)) {
4061 // Leave a marker since we can't modify the CFG here.
4062 CreateNonTerminatorUnreachable(InsertAt: &FI);
4063 return eraseInstFromFunction(I&: FI);
4064 }
4065
4066 // If we have 'free null' delete the instruction. This can happen in stl code
4067 // when lots of inlining happens.
4068 if (isa<ConstantPointerNull>(Val: Op))
4069 return eraseInstFromFunction(I&: FI);
4070
4071 // If we had free(realloc(...)) with no intervening uses, then eliminate the
4072 // realloc() entirely.
4073 CallInst *CI = dyn_cast<CallInst>(Val: Op);
4074 if (CI && CI->hasOneUse())
4075 if (Value *ReallocatedOp = getReallocatedOperand(CB: CI))
4076 return eraseInstFromFunction(I&: *replaceInstUsesWith(I&: *CI, V: ReallocatedOp));
4077
4078 // If we optimize for code size, try to move the call to free before the null
4079 // test so that simplify cfg can remove the empty block and dead code
4080 // elimination the branch. I.e., helps to turn something like:
4081 // if (foo) free(foo);
4082 // into
4083 // free(foo);
4084 //
4085 // Note that we can only do this for 'free' and not for any flavor of
4086 // 'operator delete'; there is no 'operator delete' symbol for which we are
4087 // permitted to invent a call, even if we're passing in a null pointer.
4088 if (MinimizeSize) {
4089 LibFunc Func;
4090 if (TLI.getLibFunc(CB: FI, F&: Func) && TLI.has(F: Func) && Func == LibFunc_free)
4091 if (Instruction *I = tryToMoveFreeBeforeNullTest(FI, DL))
4092 return I;
4093 }
4094
4095 return nullptr;
4096}
4097
4098Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
4099 Value *RetVal = RI.getReturnValue();
4100 if (!RetVal)
4101 return nullptr;
4102
4103 Function *F = RI.getFunction();
4104 Type *RetTy = RetVal->getType();
4105 if (RetTy->isPointerTy()) {
4106 bool HasDereferenceable =
4107 F->getAttributes().getRetDereferenceableBytes() > 0;
4108 if (F->hasRetAttribute(Kind: Attribute::NonNull) ||
4109 (HasDereferenceable &&
4110 !NullPointerIsDefined(F, AS: RetTy->getPointerAddressSpace()))) {
4111 if (Value *V = simplifyNonNullOperand(V: RetVal, HasDereferenceable))
4112 return replaceOperand(I&: RI, OpNum: 0, V);
4113 }
4114 }
4115
4116 if (!AttributeFuncs::isNoFPClassCompatibleType(Ty: RetTy))
4117 return nullptr;
4118
4119 FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
4120 if (ReturnClass == fcNone)
4121 return nullptr;
4122
4123 KnownFPClass KnownClass;
4124 if (SimplifyDemandedFPClass(I: &RI, Op: 0, DemandedMask: ~ReturnClass, Known&: KnownClass))
4125 return &RI;
4126
4127 return nullptr;
4128}
4129
4130// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
4131bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
4132 // Try to remove the previous instruction if it must lead to unreachable.
4133 // This includes instructions like stores and "llvm.assume" that may not get
4134 // removed by simple dead code elimination.
4135 bool Changed = false;
4136 while (Instruction *Prev = I.getPrevNode()) {
4137 // While we theoretically can erase EH, that would result in a block that
4138 // used to start with an EH no longer starting with EH, which is invalid.
4139 // To make it valid, we'd need to fixup predecessors to no longer refer to
4140 // this block, but that changes CFG, which is not allowed in InstCombine.
4141 if (Prev->isEHPad())
4142 break; // Can not drop any more instructions. We're done here.
4143
4144 if (!isGuaranteedToTransferExecutionToSuccessor(I: Prev))
4145 break; // Can not drop any more instructions. We're done here.
4146 // Otherwise, this instruction can be freely erased,
4147 // even if it is not side-effect free.
4148
4149 // A value may still have uses before we process it here (for example, in
4150 // another unreachable block), so convert those to poison.
4151 replaceInstUsesWith(I&: *Prev, V: PoisonValue::get(T: Prev->getType()));
4152 eraseInstFromFunction(I&: *Prev);
4153 Changed = true;
4154 }
4155 return Changed;
4156}
4157
4158Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
4159 removeInstructionsBeforeUnreachable(I);
4160 return nullptr;
4161}
4162
4163Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
4164 assert(BI.isUnconditional() && "Only for unconditional branches.");
4165
4166 // If this store is the second-to-last instruction in the basic block
4167 // (excluding debug info) and if the block ends with
4168 // an unconditional branch, try to move the store to the successor block.
4169
4170 auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
4171 BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
4172 do {
4173 if (BBI != FirstInstr)
4174 --BBI;
4175 } while (BBI != FirstInstr && BBI->isDebugOrPseudoInst());
4176
4177 return dyn_cast<StoreInst>(Val&: BBI);
4178 };
4179
4180 if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
4181 if (mergeStoreIntoSuccessor(SI&: *SI))
4182 return &BI;
4183
4184 return nullptr;
4185}
4186
4187void InstCombinerImpl::addDeadEdge(BasicBlock *From, BasicBlock *To,
4188 SmallVectorImpl<BasicBlock *> &Worklist) {
4189 if (!DeadEdges.insert(V: {From, To}).second)
4190 return;
4191
4192 // Replace phi node operands in successor with poison.
4193 for (PHINode &PN : To->phis())
4194 for (Use &U : PN.incoming_values())
4195 if (PN.getIncomingBlock(U) == From && !isa<PoisonValue>(Val: U)) {
4196 replaceUse(U, NewValue: PoisonValue::get(T: PN.getType()));
4197 addToWorklist(I: &PN);
4198 MadeIRChange = true;
4199 }
4200
4201 Worklist.push_back(Elt: To);
4202}
4203
4204// Under the assumption that I is unreachable, remove it and following
4205// instructions. Changes are reported directly to MadeIRChange.
4206void InstCombinerImpl::handleUnreachableFrom(
4207 Instruction *I, SmallVectorImpl<BasicBlock *> &Worklist) {
4208 BasicBlock *BB = I->getParent();
4209 for (Instruction &Inst : make_early_inc_range(
4210 Range: make_range(x: std::next(x: BB->getTerminator()->getReverseIterator()),
4211 y: std::next(x: I->getReverseIterator())))) {
4212 if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
4213 replaceInstUsesWith(I&: Inst, V: PoisonValue::get(T: Inst.getType()));
4214 MadeIRChange = true;
4215 }
4216 if (Inst.isEHPad() || Inst.getType()->isTokenTy())
4217 continue;
4218 // RemoveDIs: erase debug-info on this instruction manually.
4219 Inst.dropDbgRecords();
4220 eraseInstFromFunction(I&: Inst);
4221 MadeIRChange = true;
4222 }
4223
4224 SmallVector<Value *> Changed;
4225 if (handleUnreachableTerminator(I: BB->getTerminator(), PoisonedValues&: Changed)) {
4226 MadeIRChange = true;
4227 for (Value *V : Changed)
4228 addToWorklist(I: cast<Instruction>(Val: V));
4229 }
4230
4231 // Handle potentially dead successors.
4232 for (BasicBlock *Succ : successors(BB))
4233 addDeadEdge(From: BB, To: Succ, Worklist);
4234}
4235
4236void InstCombinerImpl::handlePotentiallyDeadBlocks(
4237 SmallVectorImpl<BasicBlock *> &Worklist) {
4238 while (!Worklist.empty()) {
4239 BasicBlock *BB = Worklist.pop_back_val();
4240 if (!all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
4241 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
4242 }))
4243 continue;
4244
4245 handleUnreachableFrom(I: &BB->front(), Worklist);
4246 }
4247}
4248
4249void InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
4250 BasicBlock *LiveSucc) {
4251 SmallVector<BasicBlock *> Worklist;
4252 for (BasicBlock *Succ : successors(BB)) {
4253 // The live successor isn't dead.
4254 if (Succ == LiveSucc)
4255 continue;
4256
4257 addDeadEdge(From: BB, To: Succ, Worklist);
4258 }
4259
4260 handlePotentiallyDeadBlocks(Worklist);
4261}
4262
4263Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
4264 if (BI.isUnconditional())
4265 return visitUnconditionalBranchInst(BI);
4266
4267 // Change br (not X), label True, label False to: br X, label False, True
4268 Value *Cond = BI.getCondition();
4269 Value *X;
4270 if (match(V: Cond, P: m_Not(V: m_Value(V&: X))) && !isa<Constant>(Val: X)) {
4271 // Swap Destinations and condition...
4272 BI.swapSuccessors();
4273 if (BPI)
4274 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4275 return replaceOperand(I&: BI, OpNum: 0, V: X);
4276 }
4277
4278 // Canonicalize logical-and-with-invert as logical-or-with-invert.
4279 // This is done by inverting the condition and swapping successors:
4280 // br (X && !Y), T, F --> br !(X && !Y), F, T --> br (!X || Y), F, T
4281 Value *Y;
4282 if (isa<SelectInst>(Val: Cond) &&
4283 match(V: Cond,
4284 P: m_OneUse(SubPattern: m_LogicalAnd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Not(V: m_Value(V&: Y))))))) {
4285 Value *NotX = Builder.CreateNot(V: X, Name: "not." + X->getName());
4286 Value *Or = Builder.CreateLogicalOr(Cond1: NotX, Cond2: Y);
4287
4288 // Set weights for the new OR select instruction too.
4289 if (!ProfcheckDisableMetadataFixes) {
4290 if (auto *OrInst = dyn_cast<Instruction>(Val: Or)) {
4291 if (auto *CondInst = dyn_cast<Instruction>(Val: Cond)) {
4292 SmallVector<uint32_t> Weights;
4293 if (extractBranchWeights(I: *CondInst, Weights)) {
4294 assert(Weights.size() == 2 &&
4295 "Unexpected number of branch weights!");
4296 std::swap(a&: Weights[0], b&: Weights[1]);
4297 setBranchWeights(I&: *OrInst, Weights, /*IsExpected=*/false);
4298 }
4299 }
4300 }
4301 }
4302 BI.swapSuccessors();
4303 if (BPI)
4304 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4305 return replaceOperand(I&: BI, OpNum: 0, V: Or);
4306 }
4307
4308 // If the condition is irrelevant, remove the use so that other
4309 // transforms on the condition become more effective.
4310 if (!isa<ConstantInt>(Val: Cond) && BI.getSuccessor(i: 0) == BI.getSuccessor(i: 1))
4311 return replaceOperand(I&: BI, OpNum: 0, V: ConstantInt::getFalse(Ty: Cond->getType()));
4312
4313 // Canonicalize, for example, fcmp_one -> fcmp_oeq.
4314 CmpPredicate Pred;
4315 if (match(V: Cond, P: m_OneUse(SubPattern: m_FCmp(Pred, L: m_Value(), R: m_Value()))) &&
4316 !isCanonicalPredicate(Pred)) {
4317 // Swap destinations and condition.
4318 auto *Cmp = cast<CmpInst>(Val: Cond);
4319 Cmp->setPredicate(CmpInst::getInversePredicate(pred: Pred));
4320 BI.swapSuccessors();
4321 if (BPI)
4322 BPI->swapSuccEdgesProbabilities(Src: BI.getParent());
4323 Worklist.push(I: Cmp);
4324 return &BI;
4325 }
4326
4327 if (isa<UndefValue>(Val: Cond)) {
4328 handlePotentiallyDeadSuccessors(BB: BI.getParent(), /*LiveSucc*/ nullptr);
4329 return nullptr;
4330 }
4331 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4332 handlePotentiallyDeadSuccessors(BB: BI.getParent(),
4333 LiveSucc: BI.getSuccessor(i: !CI->getZExtValue()));
4334 return nullptr;
4335 }
4336
4337 // Replace all dominated uses of the condition with true/false
4338 // Ignore constant expressions to avoid iterating over uses on other
4339 // functions.
4340 if (!isa<Constant>(Val: Cond) && BI.getSuccessor(i: 0) != BI.getSuccessor(i: 1)) {
4341 for (auto &U : make_early_inc_range(Range: Cond->uses())) {
4342 BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(i: 0));
4343 if (DT.dominates(BBE: Edge0, U)) {
4344 replaceUse(U, NewValue: ConstantInt::getTrue(Ty: Cond->getType()));
4345 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4346 continue;
4347 }
4348 BasicBlockEdge Edge1(BI.getParent(), BI.getSuccessor(i: 1));
4349 if (DT.dominates(BBE: Edge1, U)) {
4350 replaceUse(U, NewValue: ConstantInt::getFalse(Ty: Cond->getType()));
4351 addToWorklist(I: cast<Instruction>(Val: U.getUser()));
4352 }
4353 }
4354 }
4355
4356 DC.registerBranch(BI: &BI);
4357 return nullptr;
4358}
4359
4360// Replaces (switch (select cond, X, C)/(select cond, C, X)) with (switch X) if
4361// we can prove that both (switch C) and (switch X) go to the default when cond
4362// is false/true.
4363static Value *simplifySwitchOnSelectUsingRanges(SwitchInst &SI,
4364 SelectInst *Select,
4365 bool IsTrueArm) {
4366 unsigned CstOpIdx = IsTrueArm ? 1 : 2;
4367 auto *C = dyn_cast<ConstantInt>(Val: Select->getOperand(i_nocapture: CstOpIdx));
4368 if (!C)
4369 return nullptr;
4370
4371 BasicBlock *CstBB = SI.findCaseValue(C)->getCaseSuccessor();
4372 if (CstBB != SI.getDefaultDest())
4373 return nullptr;
4374 Value *X = Select->getOperand(i_nocapture: 3 - CstOpIdx);
4375 CmpPredicate Pred;
4376 const APInt *RHSC;
4377 if (!match(V: Select->getCondition(),
4378 P: m_ICmp(Pred, L: m_Specific(V: X), R: m_APInt(Res&: RHSC))))
4379 return nullptr;
4380 if (IsTrueArm)
4381 Pred = ICmpInst::getInversePredicate(pred: Pred);
4382
4383 // See whether we can replace the select with X
4384 ConstantRange CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
4385 for (auto Case : SI.cases())
4386 if (!CR.contains(Val: Case.getCaseValue()->getValue()))
4387 return nullptr;
4388
4389 return X;
4390}
4391
4392Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
4393 Value *Cond = SI.getCondition();
4394 Value *Op0;
4395 const APInt *CondOpC;
4396 using InvertFn = std::function<APInt(const APInt &Case, const APInt &C)>;
4397
4398 auto MaybeInvertible = [&](Value *Cond) -> InvertFn {
4399 if (match(V: Cond, P: m_Add(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))))
4400 // Change 'switch (X+C) case Case:' into 'switch (X) case Case-C'.
4401 return [](const APInt &Case, const APInt &C) { return Case - C; };
4402
4403 if (match(V: Cond, P: m_Sub(L: m_APInt(Res&: CondOpC), R: m_Value(V&: Op0))))
4404 // Change 'switch (C-X) case Case:' into 'switch (X) case C-Case'.
4405 return [](const APInt &Case, const APInt &C) { return C - Case; };
4406
4407 if (match(V: Cond, P: m_Xor(L: m_Value(V&: Op0), R: m_APInt(Res&: CondOpC))) &&
4408 !CondOpC->isMinSignedValue() && !CondOpC->isMaxSignedValue())
4409 // Change 'switch (X^C) case Case:' into 'switch (X) case Case^C'.
4410 // Prevent creation of large case values by excluding extremes.
4411 return [](const APInt &Case, const APInt &C) { return Case ^ C; };
4412
4413 return nullptr;
4414 };
4415
4416 // Attempt to invert and simplify the switch condition, as long as the
4417 // condition is not used further, as it may not be profitable otherwise.
4418 if (auto InvertFn = MaybeInvertible(Cond); InvertFn && Cond->hasOneUse()) {
4419 for (auto &Case : SI.cases()) {
4420 const APInt &New = InvertFn(Case.getCaseValue()->getValue(), *CondOpC);
4421 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: New));
4422 }
4423 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4424 }
4425
4426 uint64_t ShiftAmt;
4427 if (match(V: Cond, P: m_Shl(L: m_Value(V&: Op0), R: m_ConstantInt(V&: ShiftAmt))) &&
4428 ShiftAmt < Op0->getType()->getScalarSizeInBits() &&
4429 all_of(Range: SI.cases(), P: [&](const auto &Case) {
4430 return Case.getCaseValue()->getValue().countr_zero() >= ShiftAmt;
4431 })) {
4432 // Change 'switch (X << 2) case 4:' into 'switch (X) case 1:'.
4433 OverflowingBinaryOperator *Shl = cast<OverflowingBinaryOperator>(Val: Cond);
4434 if (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap() ||
4435 Shl->hasOneUse()) {
4436 Value *NewCond = Op0;
4437 if (!Shl->hasNoUnsignedWrap() && !Shl->hasNoSignedWrap()) {
4438 // If the shift may wrap, we need to mask off the shifted bits.
4439 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
4440 NewCond = Builder.CreateAnd(
4441 LHS: Op0, RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - ShiftAmt));
4442 }
4443 for (auto Case : SI.cases()) {
4444 const APInt &CaseVal = Case.getCaseValue()->getValue();
4445 APInt ShiftedCase = Shl->hasNoSignedWrap() ? CaseVal.ashr(ShiftAmt)
4446 : CaseVal.lshr(shiftAmt: ShiftAmt);
4447 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: ShiftedCase));
4448 }
4449 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4450 }
4451 }
4452
4453 // Fold switch(zext/sext(X)) into switch(X) if possible.
4454 if (match(V: Cond, P: m_ZExtOrSExt(Op: m_Value(V&: Op0)))) {
4455 bool IsZExt = isa<ZExtInst>(Val: Cond);
4456 Type *SrcTy = Op0->getType();
4457 unsigned NewWidth = SrcTy->getScalarSizeInBits();
4458
4459 if (all_of(Range: SI.cases(), P: [&](const auto &Case) {
4460 const APInt &CaseVal = Case.getCaseValue()->getValue();
4461 return IsZExt ? CaseVal.isIntN(N: NewWidth)
4462 : CaseVal.isSignedIntN(N: NewWidth);
4463 })) {
4464 for (auto &Case : SI.cases()) {
4465 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4466 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4467 }
4468 return replaceOperand(I&: SI, OpNum: 0, V: Op0);
4469 }
4470 }
4471
4472 // Fold switch(select cond, X, Y) into switch(X/Y) if possible
4473 if (auto *Select = dyn_cast<SelectInst>(Val: Cond)) {
4474 if (Value *V =
4475 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/true))
4476 return replaceOperand(I&: SI, OpNum: 0, V);
4477 if (Value *V =
4478 simplifySwitchOnSelectUsingRanges(SI, Select, /*IsTrueArm=*/false))
4479 return replaceOperand(I&: SI, OpNum: 0, V);
4480 }
4481
4482 KnownBits Known = computeKnownBits(V: Cond, CxtI: &SI);
4483 unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
4484 unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
4485
4486 // Compute the number of leading bits we can ignore.
4487 // TODO: A better way to determine this would use ComputeNumSignBits().
4488 for (const auto &C : SI.cases()) {
4489 LeadingKnownZeros =
4490 std::min(a: LeadingKnownZeros, b: C.getCaseValue()->getValue().countl_zero());
4491 LeadingKnownOnes =
4492 std::min(a: LeadingKnownOnes, b: C.getCaseValue()->getValue().countl_one());
4493 }
4494
4495 unsigned NewWidth = Known.getBitWidth() - std::max(a: LeadingKnownZeros, b: LeadingKnownOnes);
4496
4497 // Shrink the condition operand if the new type is smaller than the old type.
4498 // But do not shrink to a non-standard type, because backend can't generate
4499 // good code for that yet.
4500 // TODO: We can make it aggressive again after fixing PR39569.
4501 if (NewWidth > 0 && NewWidth < Known.getBitWidth() &&
4502 shouldChangeType(FromWidth: Known.getBitWidth(), ToWidth: NewWidth)) {
4503 IntegerType *Ty = IntegerType::get(C&: SI.getContext(), NumBits: NewWidth);
4504 Builder.SetInsertPoint(&SI);
4505 Value *NewCond = Builder.CreateTrunc(V: Cond, DestTy: Ty, Name: "trunc");
4506
4507 for (auto Case : SI.cases()) {
4508 APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(width: NewWidth);
4509 Case.setValue(ConstantInt::get(Context&: SI.getContext(), V: TruncatedCase));
4510 }
4511 return replaceOperand(I&: SI, OpNum: 0, V: NewCond);
4512 }
4513
4514 if (isa<UndefValue>(Val: Cond)) {
4515 handlePotentiallyDeadSuccessors(BB: SI.getParent(), /*LiveSucc*/ nullptr);
4516 return nullptr;
4517 }
4518 if (auto *CI = dyn_cast<ConstantInt>(Val: Cond)) {
4519 handlePotentiallyDeadSuccessors(BB: SI.getParent(),
4520 LiveSucc: SI.findCaseValue(C: CI)->getCaseSuccessor());
4521 return nullptr;
4522 }
4523
4524 return nullptr;
4525}
4526
4527Instruction *
4528InstCombinerImpl::foldExtractOfOverflowIntrinsic(ExtractValueInst &EV) {
4529 auto *WO = dyn_cast<WithOverflowInst>(Val: EV.getAggregateOperand());
4530 if (!WO)
4531 return nullptr;
4532
4533 Intrinsic::ID OvID = WO->getIntrinsicID();
4534 const APInt *C = nullptr;
4535 if (match(V: WO->getRHS(), P: m_APIntAllowPoison(Res&: C))) {
4536 if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow ||
4537 OvID == Intrinsic::umul_with_overflow)) {
4538 // extractvalue (any_mul_with_overflow X, -1), 0 --> -X
4539 if (C->isAllOnes())
4540 return BinaryOperator::CreateNeg(Op: WO->getLHS());
4541 // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n
4542 if (C->isPowerOf2()) {
4543 return BinaryOperator::CreateShl(
4544 V1: WO->getLHS(),
4545 V2: ConstantInt::get(Ty: WO->getLHS()->getType(), V: C->logBase2()));
4546 }
4547 }
4548 }
4549
4550 // We're extracting from an overflow intrinsic. See if we're the only user.
4551 // That allows us to simplify multiple result intrinsics to simpler things
4552 // that just get one value.
4553 if (!WO->hasOneUse())
4554 return nullptr;
4555
4556 // Check if we're grabbing only the result of a 'with overflow' intrinsic
4557 // and replace it with a traditional binary instruction.
4558 if (*EV.idx_begin() == 0) {
4559 Instruction::BinaryOps BinOp = WO->getBinaryOp();
4560 Value *LHS = WO->getLHS(), *RHS = WO->getRHS();
4561 // Replace the old instruction's uses with poison.
4562 replaceInstUsesWith(I&: *WO, V: PoisonValue::get(T: WO->getType()));
4563 eraseInstFromFunction(I&: *WO);
4564 return BinaryOperator::Create(Op: BinOp, S1: LHS, S2: RHS);
4565 }
4566
4567 assert(*EV.idx_begin() == 1 && "Unexpected extract index for overflow inst");
4568
4569 // (usub LHS, RHS) overflows when LHS is unsigned-less-than RHS.
4570 if (OvID == Intrinsic::usub_with_overflow)
4571 return new ICmpInst(ICmpInst::ICMP_ULT, WO->getLHS(), WO->getRHS());
4572
4573 // smul with i1 types overflows when both sides are set: -1 * -1 == +1, but
4574 // +1 is not possible because we assume signed values.
4575 if (OvID == Intrinsic::smul_with_overflow &&
4576 WO->getLHS()->getType()->isIntOrIntVectorTy(BitWidth: 1))
4577 return BinaryOperator::CreateAnd(V1: WO->getLHS(), V2: WO->getRHS());
4578
4579 // extractvalue (umul_with_overflow X, X), 1 -> X u> 2^(N/2)-1
4580 if (OvID == Intrinsic::umul_with_overflow && WO->getLHS() == WO->getRHS()) {
4581 unsigned BitWidth = WO->getLHS()->getType()->getScalarSizeInBits();
4582 // Only handle even bitwidths for performance reasons.
4583 if (BitWidth % 2 == 0)
4584 return new ICmpInst(
4585 ICmpInst::ICMP_UGT, WO->getLHS(),
4586 ConstantInt::get(Ty: WO->getLHS()->getType(),
4587 V: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth / 2)));
4588 }
4589
4590 // If only the overflow result is used, and the right hand side is a
4591 // constant (or constant splat), we can remove the intrinsic by directly
4592 // checking for overflow.
4593 if (C) {
4594 // Compute the no-wrap range for LHS given RHS=C, then construct an
4595 // equivalent icmp, potentially using an offset.
4596 ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(
4597 BinOp: WO->getBinaryOp(), Other: *C, NoWrapKind: WO->getNoWrapKind());
4598
4599 CmpInst::Predicate Pred;
4600 APInt NewRHSC, Offset;
4601 NWR.getEquivalentICmp(Pred, RHS&: NewRHSC, Offset);
4602 auto *OpTy = WO->getRHS()->getType();
4603 auto *NewLHS = WO->getLHS();
4604 if (Offset != 0)
4605 NewLHS = Builder.CreateAdd(LHS: NewLHS, RHS: ConstantInt::get(Ty: OpTy, V: Offset));
4606 return new ICmpInst(ICmpInst::getInversePredicate(pred: Pred), NewLHS,
4607 ConstantInt::get(Ty: OpTy, V: NewRHSC));
4608 }
4609
4610 return nullptr;
4611}
4612
4613static Value *foldFrexpOfSelect(ExtractValueInst &EV, IntrinsicInst *FrexpCall,
4614 SelectInst *SelectInst,
4615 InstCombiner::BuilderTy &Builder) {
4616 // Helper to fold frexp of select to select of frexp.
4617
4618 if (!SelectInst->hasOneUse() || !FrexpCall->hasOneUse())
4619 return nullptr;
4620 Value *Cond = SelectInst->getCondition();
4621 Value *TrueVal = SelectInst->getTrueValue();
4622 Value *FalseVal = SelectInst->getFalseValue();
4623
4624 const APFloat *ConstVal = nullptr;
4625 Value *VarOp = nullptr;
4626 bool ConstIsTrue = false;
4627
4628 if (match(V: TrueVal, P: m_APFloat(Res&: ConstVal))) {
4629 VarOp = FalseVal;
4630 ConstIsTrue = true;
4631 } else if (match(V: FalseVal, P: m_APFloat(Res&: ConstVal))) {
4632 VarOp = TrueVal;
4633 ConstIsTrue = false;
4634 } else {
4635 return nullptr;
4636 }
4637
4638 Builder.SetInsertPoint(&EV);
4639
4640 CallInst *NewFrexp =
4641 Builder.CreateCall(Callee: FrexpCall->getCalledFunction(), Args: {VarOp}, Name: "frexp");
4642 NewFrexp->copyIRFlags(V: FrexpCall);
4643
4644 Value *NewEV = Builder.CreateExtractValue(Agg: NewFrexp, Idxs: 0, Name: "mantissa");
4645
4646 int Exp;
4647 APFloat Mantissa = frexp(X: *ConstVal, Exp, RM: APFloat::rmNearestTiesToEven);
4648
4649 Constant *ConstantMantissa = ConstantFP::get(Ty: TrueVal->getType(), V: Mantissa);
4650
4651 Value *NewSel = Builder.CreateSelectFMF(
4652 C: Cond, True: ConstIsTrue ? ConstantMantissa : NewEV,
4653 False: ConstIsTrue ? NewEV : ConstantMantissa, FMFSource: SelectInst, Name: "select.frexp");
4654 return NewSel;
4655}
4656Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
4657 Value *Agg = EV.getAggregateOperand();
4658
4659 if (!EV.hasIndices())
4660 return replaceInstUsesWith(I&: EV, V: Agg);
4661
4662 if (Value *V = simplifyExtractValueInst(Agg, Idxs: EV.getIndices(),
4663 Q: SQ.getWithInstruction(I: &EV)))
4664 return replaceInstUsesWith(I&: EV, V);
4665
4666 Value *Cond, *TrueVal, *FalseVal;
4667 if (match(V: &EV, P: m_ExtractValue<0>(V: m_Intrinsic<Intrinsic::frexp>(Op0: m_Select(
4668 C: m_Value(V&: Cond), L: m_Value(V&: TrueVal), R: m_Value(V&: FalseVal)))))) {
4669 auto *SelInst =
4670 cast<SelectInst>(Val: cast<IntrinsicInst>(Val: Agg)->getArgOperand(i: 0));
4671 if (Value *Result =
4672 foldFrexpOfSelect(EV, FrexpCall: cast<IntrinsicInst>(Val: Agg), SelectInst: SelInst, Builder))
4673 return replaceInstUsesWith(I&: EV, V: Result);
4674 }
4675 if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Val: Agg)) {
4676 // We're extracting from an insertvalue instruction, compare the indices
4677 const unsigned *exti, *exte, *insi, *inse;
4678 for (exti = EV.idx_begin(), insi = IV->idx_begin(),
4679 exte = EV.idx_end(), inse = IV->idx_end();
4680 exti != exte && insi != inse;
4681 ++exti, ++insi) {
4682 if (*insi != *exti)
4683 // The insert and extract both reference distinctly different elements.
4684 // This means the extract is not influenced by the insert, and we can
4685 // replace the aggregate operand of the extract with the aggregate
4686 // operand of the insert. i.e., replace
4687 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4688 // %E = extractvalue { i32, { i32 } } %I, 0
4689 // with
4690 // %E = extractvalue { i32, { i32 } } %A, 0
4691 return ExtractValueInst::Create(Agg: IV->getAggregateOperand(),
4692 Idxs: EV.getIndices());
4693 }
4694 if (exti == exte && insi == inse)
4695 // Both iterators are at the end: Index lists are identical. Replace
4696 // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4697 // %C = extractvalue { i32, { i32 } } %B, 1, 0
4698 // with "i32 42"
4699 return replaceInstUsesWith(I&: EV, V: IV->getInsertedValueOperand());
4700 if (exti == exte) {
4701 // The extract list is a prefix of the insert list. i.e. replace
4702 // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
4703 // %E = extractvalue { i32, { i32 } } %I, 1
4704 // with
4705 // %X = extractvalue { i32, { i32 } } %A, 1
4706 // %E = insertvalue { i32 } %X, i32 42, 0
4707 // by switching the order of the insert and extract (though the
4708 // insertvalue should be left in, since it may have other uses).
4709 Value *NewEV = Builder.CreateExtractValue(Agg: IV->getAggregateOperand(),
4710 Idxs: EV.getIndices());
4711 return InsertValueInst::Create(Agg: NewEV, Val: IV->getInsertedValueOperand(),
4712 Idxs: ArrayRef(insi, inse));
4713 }
4714 if (insi == inse)
4715 // The insert list is a prefix of the extract list
4716 // We can simply remove the common indices from the extract and make it
4717 // operate on the inserted value instead of the insertvalue result.
4718 // i.e., replace
4719 // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
4720 // %E = extractvalue { i32, { i32 } } %I, 1, 0
4721 // with
4722 // %E extractvalue { i32 } { i32 42 }, 0
4723 return ExtractValueInst::Create(Agg: IV->getInsertedValueOperand(),
4724 Idxs: ArrayRef(exti, exte));
4725 }
4726
4727 if (Instruction *R = foldExtractOfOverflowIntrinsic(EV))
4728 return R;
4729
4730 if (LoadInst *L = dyn_cast<LoadInst>(Val: Agg)) {
4731 // Bail out if the aggregate contains scalable vector type
4732 if (auto *STy = dyn_cast<StructType>(Val: Agg->getType());
4733 STy && STy->isScalableTy())
4734 return nullptr;
4735
4736 // If the (non-volatile) load only has one use, we can rewrite this to a
4737 // load from a GEP. This reduces the size of the load. If a load is used
4738 // only by extractvalue instructions then this either must have been
4739 // optimized before, or it is a struct with padding, in which case we
4740 // don't want to do the transformation as it loses padding knowledge.
4741 if (L->isSimple() && L->hasOneUse()) {
4742 // extractvalue has integer indices, getelementptr has Value*s. Convert.
4743 SmallVector<Value*, 4> Indices;
4744 // Prefix an i32 0 since we need the first element.
4745 Indices.push_back(Elt: Builder.getInt32(C: 0));
4746 for (unsigned Idx : EV.indices())
4747 Indices.push_back(Elt: Builder.getInt32(C: Idx));
4748
4749 // We need to insert these at the location of the old load, not at that of
4750 // the extractvalue.
4751 Builder.SetInsertPoint(L);
4752 Value *GEP = Builder.CreateInBoundsGEP(Ty: L->getType(),
4753 Ptr: L->getPointerOperand(), IdxList: Indices);
4754 Instruction *NL = Builder.CreateLoad(Ty: EV.getType(), Ptr: GEP);
4755 // Whatever aliasing information we had for the orignal load must also
4756 // hold for the smaller load, so propagate the annotations.
4757 NL->setAAMetadata(L->getAAMetadata());
4758 // Returning the load directly will cause the main loop to insert it in
4759 // the wrong spot, so use replaceInstUsesWith().
4760 return replaceInstUsesWith(I&: EV, V: NL);
4761 }
4762 }
4763
4764 if (auto *PN = dyn_cast<PHINode>(Val: Agg))
4765 if (Instruction *Res = foldOpIntoPhi(I&: EV, PN))
4766 return Res;
4767
4768 // Canonicalize extract (select Cond, TV, FV)
4769 // -> select cond, (extract TV), (extract FV)
4770 if (auto *SI = dyn_cast<SelectInst>(Val: Agg))
4771 if (Instruction *R = FoldOpIntoSelect(Op&: EV, SI, /*FoldWithMultiUse=*/true))
4772 return R;
4773
4774 // We could simplify extracts from other values. Note that nested extracts may
4775 // already be simplified implicitly by the above: extract (extract (insert) )
4776 // will be translated into extract ( insert ( extract ) ) first and then just
4777 // the value inserted, if appropriate. Similarly for extracts from single-use
4778 // loads: extract (extract (load)) will be translated to extract (load (gep))
4779 // and if again single-use then via load (gep (gep)) to load (gep).
4780 // However, double extracts from e.g. function arguments or return values
4781 // aren't handled yet.
4782 return nullptr;
4783}
4784
4785/// Return 'true' if the given typeinfo will match anything.
4786static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
4787 switch (Personality) {
4788 case EHPersonality::GNU_C:
4789 case EHPersonality::GNU_C_SjLj:
4790 case EHPersonality::Rust:
4791 // The GCC C EH and Rust personality only exists to support cleanups, so
4792 // it's not clear what the semantics of catch clauses are.
4793 return false;
4794 case EHPersonality::Unknown:
4795 return false;
4796 case EHPersonality::GNU_Ada:
4797 // While __gnat_all_others_value will match any Ada exception, it doesn't
4798 // match foreign exceptions (or didn't, before gcc-4.7).
4799 return false;
4800 case EHPersonality::GNU_CXX:
4801 case EHPersonality::GNU_CXX_SjLj:
4802 case EHPersonality::GNU_ObjC:
4803 case EHPersonality::MSVC_X86SEH:
4804 case EHPersonality::MSVC_TableSEH:
4805 case EHPersonality::MSVC_CXX:
4806 case EHPersonality::CoreCLR:
4807 case EHPersonality::Wasm_CXX:
4808 case EHPersonality::XL_CXX:
4809 case EHPersonality::ZOS_CXX:
4810 return TypeInfo->isNullValue();
4811 }
4812 llvm_unreachable("invalid enum");
4813}
4814
4815static bool shorter_filter(const Value *LHS, const Value *RHS) {
4816 return
4817 cast<ArrayType>(Val: LHS->getType())->getNumElements()
4818 <
4819 cast<ArrayType>(Val: RHS->getType())->getNumElements();
4820}
4821
4822Instruction *InstCombinerImpl::visitLandingPadInst(LandingPadInst &LI) {
4823 // The logic here should be correct for any real-world personality function.
4824 // However if that turns out not to be true, the offending logic can always
4825 // be conditioned on the personality function, like the catch-all logic is.
4826 EHPersonality Personality =
4827 classifyEHPersonality(Pers: LI.getParent()->getParent()->getPersonalityFn());
4828
4829 // Simplify the list of clauses, eg by removing repeated catch clauses
4830 // (these are often created by inlining).
4831 bool MakeNewInstruction = false; // If true, recreate using the following:
4832 SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
4833 bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
4834
4835 SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
4836 for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) {
4837 bool isLastClause = i + 1 == e;
4838 if (LI.isCatch(Idx: i)) {
4839 // A catch clause.
4840 Constant *CatchClause = LI.getClause(Idx: i);
4841 Constant *TypeInfo = CatchClause->stripPointerCasts();
4842
4843 // If we already saw this clause, there is no point in having a second
4844 // copy of it.
4845 if (AlreadyCaught.insert(Ptr: TypeInfo).second) {
4846 // This catch clause was not already seen.
4847 NewClauses.push_back(Elt: CatchClause);
4848 } else {
4849 // Repeated catch clause - drop the redundant copy.
4850 MakeNewInstruction = true;
4851 }
4852
4853 // If this is a catch-all then there is no point in keeping any following
4854 // clauses or marking the landingpad as having a cleanup.
4855 if (isCatchAll(Personality, TypeInfo)) {
4856 if (!isLastClause)
4857 MakeNewInstruction = true;
4858 CleanupFlag = false;
4859 break;
4860 }
4861 } else {
4862 // A filter clause. If any of the filter elements were already caught
4863 // then they can be dropped from the filter. It is tempting to try to
4864 // exploit the filter further by saying that any typeinfo that does not
4865 // occur in the filter can't be caught later (and thus can be dropped).
4866 // However this would be wrong, since typeinfos can match without being
4867 // equal (for example if one represents a C++ class, and the other some
4868 // class derived from it).
4869 assert(LI.isFilter(i) && "Unsupported landingpad clause!");
4870 Constant *FilterClause = LI.getClause(Idx: i);
4871 ArrayType *FilterType = cast<ArrayType>(Val: FilterClause->getType());
4872 unsigned NumTypeInfos = FilterType->getNumElements();
4873
4874 // An empty filter catches everything, so there is no point in keeping any
4875 // following clauses or marking the landingpad as having a cleanup. By
4876 // dealing with this case here the following code is made a bit simpler.
4877 if (!NumTypeInfos) {
4878 NewClauses.push_back(Elt: FilterClause);
4879 if (!isLastClause)
4880 MakeNewInstruction = true;
4881 CleanupFlag = false;
4882 break;
4883 }
4884
4885 bool MakeNewFilter = false; // If true, make a new filter.
4886 SmallVector<Constant *, 16> NewFilterElts; // New elements.
4887 if (isa<ConstantAggregateZero>(Val: FilterClause)) {
4888 // Not an empty filter - it contains at least one null typeinfo.
4889 assert(NumTypeInfos > 0 && "Should have handled empty filter already!");
4890 Constant *TypeInfo =
4891 Constant::getNullValue(Ty: FilterType->getElementType());
4892 // If this typeinfo is a catch-all then the filter can never match.
4893 if (isCatchAll(Personality, TypeInfo)) {
4894 // Throw the filter away.
4895 MakeNewInstruction = true;
4896 continue;
4897 }
4898
4899 // There is no point in having multiple copies of this typeinfo, so
4900 // discard all but the first copy if there is more than one.
4901 NewFilterElts.push_back(Elt: TypeInfo);
4902 if (NumTypeInfos > 1)
4903 MakeNewFilter = true;
4904 } else {
4905 ConstantArray *Filter = cast<ConstantArray>(Val: FilterClause);
4906 SmallPtrSet<Value *, 16> SeenInFilter; // For uniquing the elements.
4907 NewFilterElts.reserve(N: NumTypeInfos);
4908
4909 // Remove any filter elements that were already caught or that already
4910 // occurred in the filter. While there, see if any of the elements are
4911 // catch-alls. If so, the filter can be discarded.
4912 bool SawCatchAll = false;
4913 for (unsigned j = 0; j != NumTypeInfos; ++j) {
4914 Constant *Elt = Filter->getOperand(i_nocapture: j);
4915 Constant *TypeInfo = Elt->stripPointerCasts();
4916 if (isCatchAll(Personality, TypeInfo)) {
4917 // This element is a catch-all. Bail out, noting this fact.
4918 SawCatchAll = true;
4919 break;
4920 }
4921
4922 // Even if we've seen a type in a catch clause, we don't want to
4923 // remove it from the filter. An unexpected type handler may be
4924 // set up for a call site which throws an exception of the same
4925 // type caught. In order for the exception thrown by the unexpected
4926 // handler to propagate correctly, the filter must be correctly
4927 // described for the call site.
4928 //
4929 // Example:
4930 //
4931 // void unexpected() { throw 1;}
4932 // void foo() throw (int) {
4933 // std::set_unexpected(unexpected);
4934 // try {
4935 // throw 2.0;
4936 // } catch (int i) {}
4937 // }
4938
4939 // There is no point in having multiple copies of the same typeinfo in
4940 // a filter, so only add it if we didn't already.
4941 if (SeenInFilter.insert(Ptr: TypeInfo).second)
4942 NewFilterElts.push_back(Elt: cast<Constant>(Val: Elt));
4943 }
4944 // A filter containing a catch-all cannot match anything by definition.
4945 if (SawCatchAll) {
4946 // Throw the filter away.
4947 MakeNewInstruction = true;
4948 continue;
4949 }
4950
4951 // If we dropped something from the filter, make a new one.
4952 if (NewFilterElts.size() < NumTypeInfos)
4953 MakeNewFilter = true;
4954 }
4955 if (MakeNewFilter) {
4956 FilterType = ArrayType::get(ElementType: FilterType->getElementType(),
4957 NumElements: NewFilterElts.size());
4958 FilterClause = ConstantArray::get(T: FilterType, V: NewFilterElts);
4959 MakeNewInstruction = true;
4960 }
4961
4962 NewClauses.push_back(Elt: FilterClause);
4963
4964 // If the new filter is empty then it will catch everything so there is
4965 // no point in keeping any following clauses or marking the landingpad
4966 // as having a cleanup. The case of the original filter being empty was
4967 // already handled above.
4968 if (MakeNewFilter && !NewFilterElts.size()) {
4969 assert(MakeNewInstruction && "New filter but not a new instruction!");
4970 CleanupFlag = false;
4971 break;
4972 }
4973 }
4974 }
4975
4976 // If several filters occur in a row then reorder them so that the shortest
4977 // filters come first (those with the smallest number of elements). This is
4978 // advantageous because shorter filters are more likely to match, speeding up
4979 // unwinding, but mostly because it increases the effectiveness of the other
4980 // filter optimizations below.
4981 for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) {
4982 unsigned j;
4983 // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters.
4984 for (j = i; j != e; ++j)
4985 if (!isa<ArrayType>(Val: NewClauses[j]->getType()))
4986 break;
4987
4988 // Check whether the filters are already sorted by length. We need to know
4989 // if sorting them is actually going to do anything so that we only make a
4990 // new landingpad instruction if it does.
4991 for (unsigned k = i; k + 1 < j; ++k)
4992 if (shorter_filter(LHS: NewClauses[k+1], RHS: NewClauses[k])) {
4993 // Not sorted, so sort the filters now. Doing an unstable sort would be
4994 // correct too but reordering filters pointlessly might confuse users.
4995 std::stable_sort(first: NewClauses.begin() + i, last: NewClauses.begin() + j,
4996 comp: shorter_filter);
4997 MakeNewInstruction = true;
4998 break;
4999 }
5000
5001 // Look for the next batch of filters.
5002 i = j + 1;
5003 }
5004
5005 // If typeinfos matched if and only if equal, then the elements of a filter L
5006 // that occurs later than a filter F could be replaced by the intersection of
5007 // the elements of F and L. In reality two typeinfos can match without being
5008 // equal (for example if one represents a C++ class, and the other some class
5009 // derived from it) so it would be wrong to perform this transform in general.
5010 // However the transform is correct and useful if F is a subset of L. In that
5011 // case L can be replaced by F, and thus removed altogether since repeating a
5012 // filter is pointless. So here we look at all pairs of filters F and L where
5013 // L follows F in the list of clauses, and remove L if every element of F is
5014 // an element of L. This can occur when inlining C++ functions with exception
5015 // specifications.
5016 for (unsigned i = 0; i + 1 < NewClauses.size(); ++i) {
5017 // Examine each filter in turn.
5018 Value *Filter = NewClauses[i];
5019 ArrayType *FTy = dyn_cast<ArrayType>(Val: Filter->getType());
5020 if (!FTy)
5021 // Not a filter - skip it.
5022 continue;
5023 unsigned FElts = FTy->getNumElements();
5024 // Examine each filter following this one. Doing this backwards means that
5025 // we don't have to worry about filters disappearing under us when removed.
5026 for (unsigned j = NewClauses.size() - 1; j != i; --j) {
5027 Value *LFilter = NewClauses[j];
5028 ArrayType *LTy = dyn_cast<ArrayType>(Val: LFilter->getType());
5029 if (!LTy)
5030 // Not a filter - skip it.
5031 continue;
5032 // If Filter is a subset of LFilter, i.e. every element of Filter is also
5033 // an element of LFilter, then discard LFilter.
5034 SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
5035 // If Filter is empty then it is a subset of LFilter.
5036 if (!FElts) {
5037 // Discard LFilter.
5038 NewClauses.erase(CI: J);
5039 MakeNewInstruction = true;
5040 // Move on to the next filter.
5041 continue;
5042 }
5043 unsigned LElts = LTy->getNumElements();
5044 // If Filter is longer than LFilter then it cannot be a subset of it.
5045 if (FElts > LElts)
5046 // Move on to the next filter.
5047 continue;
5048 // At this point we know that LFilter has at least one element.
5049 if (isa<ConstantAggregateZero>(Val: LFilter)) { // LFilter only contains zeros.
5050 // Filter is a subset of LFilter iff Filter contains only zeros (as we
5051 // already know that Filter is not longer than LFilter).
5052 if (isa<ConstantAggregateZero>(Val: Filter)) {
5053 assert(FElts <= LElts && "Should have handled this case earlier!");
5054 // Discard LFilter.
5055 NewClauses.erase(CI: J);
5056 MakeNewInstruction = true;
5057 }
5058 // Move on to the next filter.
5059 continue;
5060 }
5061 ConstantArray *LArray = cast<ConstantArray>(Val: LFilter);
5062 if (isa<ConstantAggregateZero>(Val: Filter)) { // Filter only contains zeros.
5063 // Since Filter is non-empty and contains only zeros, it is a subset of
5064 // LFilter iff LFilter contains a zero.
5065 assert(FElts > 0 && "Should have eliminated the empty filter earlier!");
5066 for (unsigned l = 0; l != LElts; ++l)
5067 if (LArray->getOperand(i_nocapture: l)->isNullValue()) {
5068 // LFilter contains a zero - discard it.
5069 NewClauses.erase(CI: J);
5070 MakeNewInstruction = true;
5071 break;
5072 }
5073 // Move on to the next filter.
5074 continue;
5075 }
5076 // At this point we know that both filters are ConstantArrays. Loop over
5077 // operands to see whether every element of Filter is also an element of
5078 // LFilter. Since filters tend to be short this is probably faster than
5079 // using a method that scales nicely.
5080 ConstantArray *FArray = cast<ConstantArray>(Val: Filter);
5081 bool AllFound = true;
5082 for (unsigned f = 0; f != FElts; ++f) {
5083 Value *FTypeInfo = FArray->getOperand(i_nocapture: f)->stripPointerCasts();
5084 AllFound = false;
5085 for (unsigned l = 0; l != LElts; ++l) {
5086 Value *LTypeInfo = LArray->getOperand(i_nocapture: l)->stripPointerCasts();
5087 if (LTypeInfo == FTypeInfo) {
5088 AllFound = true;
5089 break;
5090 }
5091 }
5092 if (!AllFound)
5093 break;
5094 }
5095 if (AllFound) {
5096 // Discard LFilter.
5097 NewClauses.erase(CI: J);
5098 MakeNewInstruction = true;
5099 }
5100 // Move on to the next filter.
5101 }
5102 }
5103
5104 // If we changed any of the clauses, replace the old landingpad instruction
5105 // with a new one.
5106 if (MakeNewInstruction) {
5107 LandingPadInst *NLI = LandingPadInst::Create(RetTy: LI.getType(),
5108 NumReservedClauses: NewClauses.size());
5109 for (Constant *C : NewClauses)
5110 NLI->addClause(ClauseVal: C);
5111 // A landing pad with no clauses must have the cleanup flag set. It is
5112 // theoretically possible, though highly unlikely, that we eliminated all
5113 // clauses. If so, force the cleanup flag to true.
5114 if (NewClauses.empty())
5115 CleanupFlag = true;
5116 NLI->setCleanup(CleanupFlag);
5117 return NLI;
5118 }
5119
5120 // Even if none of the clauses changed, we may nonetheless have understood
5121 // that the cleanup flag is pointless. Clear it if so.
5122 if (LI.isCleanup() != CleanupFlag) {
5123 assert(!CleanupFlag && "Adding a cleanup, not removing one?!");
5124 LI.setCleanup(CleanupFlag);
5125 return &LI;
5126 }
5127
5128 return nullptr;
5129}
5130
5131Value *
5132InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
5133 // Try to push freeze through instructions that propagate but don't produce
5134 // poison as far as possible. If an operand of freeze does not produce poison
5135 // then push the freeze through to the operands that are not guaranteed
5136 // non-poison. The actual transform is as follows.
5137 // Op1 = ... ; Op1 can be poison
5138 // Op0 = Inst(Op1, NonPoisonOps...)
5139 // ... = Freeze(Op0)
5140 // =>
5141 // Op1 = ...
5142 // Op1.fr = Freeze(Op1)
5143 // ... = Inst(Op1.fr, NonPoisonOps...)
5144
5145 auto CanPushFreeze = [](Value *V) {
5146 if (!isa<Instruction>(Val: V) || isa<PHINode>(Val: V))
5147 return false;
5148
5149 // We can't push the freeze through an instruction which can itself create
5150 // poison. If the only source of new poison is flags, we can simply
5151 // strip them (since we know the only use is the freeze and nothing can
5152 // benefit from them.)
5153 return !canCreateUndefOrPoison(Op: cast<Operator>(Val: V),
5154 /*ConsiderFlagsAndMetadata*/ false);
5155 };
5156
5157 // Pushing freezes up long instruction chains can be expensive. Instead,
5158 // we directly push the freeze all the way to the leaves. However, we leave
5159 // deduplication of freezes on the same value for freezeOtherUses().
5160 Use *OrigUse = &OrigFI.getOperandUse(i: 0);
5161 SmallPtrSet<Instruction *, 8> Visited;
5162 SmallVector<Use *, 8> Worklist;
5163 Worklist.push_back(Elt: OrigUse);
5164 while (!Worklist.empty()) {
5165 auto *U = Worklist.pop_back_val();
5166 Value *V = U->get();
5167 if (!CanPushFreeze(V)) {
5168 // If we can't push through the original instruction, abort the transform.
5169 if (U == OrigUse)
5170 return nullptr;
5171
5172 auto *UserI = cast<Instruction>(Val: U->getUser());
5173 Builder.SetInsertPoint(UserI);
5174 Value *Frozen = Builder.CreateFreeze(V, Name: V->getName() + ".fr");
5175 U->set(Frozen);
5176 continue;
5177 }
5178
5179 auto *I = cast<Instruction>(Val: V);
5180 if (!Visited.insert(Ptr: I).second)
5181 continue;
5182
5183 // reverse() to emit freezes in a more natural order.
5184 for (Use &Op : reverse(C: I->operands())) {
5185 Value *OpV = Op.get();
5186 if (isa<MetadataAsValue>(Val: OpV) || isGuaranteedNotToBeUndefOrPoison(V: OpV))
5187 continue;
5188 Worklist.push_back(Elt: &Op);
5189 }
5190
5191 I->dropPoisonGeneratingAnnotations();
5192 this->Worklist.add(I);
5193 }
5194
5195 return OrigUse->get();
5196}
5197
5198Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
5199 PHINode *PN) {
5200 // Detect whether this is a recurrence with a start value and some number of
5201 // backedge values. We'll check whether we can push the freeze through the
5202 // backedge values (possibly dropping poison flags along the way) until we
5203 // reach the phi again. In that case, we can move the freeze to the start
5204 // value.
5205 Use *StartU = nullptr;
5206 SmallVector<Value *> Worklist;
5207 for (Use &U : PN->incoming_values()) {
5208 if (DT.dominates(A: PN->getParent(), B: PN->getIncomingBlock(U))) {
5209 // Add backedge value to worklist.
5210 Worklist.push_back(Elt: U.get());
5211 continue;
5212 }
5213
5214 // Don't bother handling multiple start values.
5215 if (StartU)
5216 return nullptr;
5217 StartU = &U;
5218 }
5219
5220 if (!StartU || Worklist.empty())
5221 return nullptr; // Not a recurrence.
5222
5223 Value *StartV = StartU->get();
5224 BasicBlock *StartBB = PN->getIncomingBlock(U: *StartU);
5225 bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(V: StartV);
5226 // We can't insert freeze if the start value is the result of the
5227 // terminator (e.g. an invoke).
5228 if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
5229 return nullptr;
5230
5231 SmallPtrSet<Value *, 32> Visited;
5232 SmallVector<Instruction *> DropFlags;
5233 while (!Worklist.empty()) {
5234 Value *V = Worklist.pop_back_val();
5235 if (!Visited.insert(Ptr: V).second)
5236 continue;
5237
5238 if (Visited.size() > 32)
5239 return nullptr; // Limit the total number of values we inspect.
5240
5241 // Assume that PN is non-poison, because it will be after the transform.
5242 if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
5243 continue;
5244
5245 Instruction *I = dyn_cast<Instruction>(Val: V);
5246 if (!I || canCreateUndefOrPoison(Op: cast<Operator>(Val: I),
5247 /*ConsiderFlagsAndMetadata*/ false))
5248 return nullptr;
5249
5250 DropFlags.push_back(Elt: I);
5251 append_range(C&: Worklist, R: I->operands());
5252 }
5253
5254 for (Instruction *I : DropFlags)
5255 I->dropPoisonGeneratingAnnotations();
5256
5257 if (StartNeedsFreeze) {
5258 Builder.SetInsertPoint(StartBB->getTerminator());
5259 Value *FrozenStartV = Builder.CreateFreeze(V: StartV,
5260 Name: StartV->getName() + ".fr");
5261 replaceUse(U&: *StartU, NewValue: FrozenStartV);
5262 }
5263 return replaceInstUsesWith(I&: FI, V: PN);
5264}
5265
5266bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
5267 Value *Op = FI.getOperand(i_nocapture: 0);
5268
5269 if (isa<Constant>(Val: Op) || Op->hasOneUse())
5270 return false;
5271
5272 // Move the freeze directly after the definition of its operand, so that
5273 // it dominates the maximum number of uses. Note that it may not dominate
5274 // *all* uses if the operand is an invoke/callbr and the use is in a phi on
5275 // the normal/default destination. This is why the domination check in the
5276 // replacement below is still necessary.
5277 BasicBlock::iterator MoveBefore;
5278 if (isa<Argument>(Val: Op)) {
5279 MoveBefore =
5280 FI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
5281 } else {
5282 auto MoveBeforeOpt = cast<Instruction>(Val: Op)->getInsertionPointAfterDef();
5283 if (!MoveBeforeOpt)
5284 return false;
5285 MoveBefore = *MoveBeforeOpt;
5286 }
5287
5288 // Re-point iterator to come after any debug-info records.
5289 MoveBefore.setHeadBit(false);
5290
5291 bool Changed = false;
5292 if (&FI != &*MoveBefore) {
5293 FI.moveBefore(BB&: *MoveBefore->getParent(), I: MoveBefore);
5294 Changed = true;
5295 }
5296
5297 Op->replaceUsesWithIf(New: &FI, ShouldReplace: [&](Use &U) -> bool {
5298 bool Dominates = DT.dominates(Def: &FI, U);
5299 Changed |= Dominates;
5300 return Dominates;
5301 });
5302
5303 return Changed;
5304}
5305
5306// Check if any direct or bitcast user of this value is a shuffle instruction.
5307static bool isUsedWithinShuffleVector(Value *V) {
5308 for (auto *U : V->users()) {
5309 if (isa<ShuffleVectorInst>(Val: U))
5310 return true;
5311 else if (match(V: U, P: m_BitCast(Op: m_Specific(V))) && isUsedWithinShuffleVector(V: U))
5312 return true;
5313 }
5314 return false;
5315}
5316
5317Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
5318 Value *Op0 = I.getOperand(i_nocapture: 0);
5319
5320 if (Value *V = simplifyFreezeInst(Op: Op0, Q: SQ.getWithInstruction(I: &I)))
5321 return replaceInstUsesWith(I, V);
5322
5323 // freeze (phi const, x) --> phi const, (freeze x)
5324 if (auto *PN = dyn_cast<PHINode>(Val: Op0)) {
5325 if (Instruction *NV = foldOpIntoPhi(I, PN))
5326 return NV;
5327 if (Instruction *NV = foldFreezeIntoRecurrence(FI&: I, PN))
5328 return NV;
5329 }
5330
5331 if (Value *NI = pushFreezeToPreventPoisonFromPropagating(OrigFI&: I))
5332 return replaceInstUsesWith(I, V: NI);
5333
5334 // If I is freeze(undef), check its uses and fold it to a fixed constant.
5335 // - or: pick -1
5336 // - select's condition: if the true value is constant, choose it by making
5337 // the condition true.
5338 // - phi: pick the common constant across operands
5339 // - default: pick 0
5340 //
5341 // Note that this transform is intentionally done here rather than
5342 // via an analysis in InstSimplify or at individual user sites. That is
5343 // because we must produce the same value for all uses of the freeze -
5344 // it's the reason "freeze" exists!
5345 //
5346 // TODO: This could use getBinopAbsorber() / getBinopIdentity() to avoid
5347 // duplicating logic for binops at least.
5348 auto getUndefReplacement = [&](Type *Ty) {
5349 auto pickCommonConstantFromPHI = [](PHINode &PN) -> Value * {
5350 // phi(freeze(undef), C, C). Choose C for freeze so the PHI can be
5351 // removed.
5352 Constant *BestValue = nullptr;
5353 for (Value *V : PN.incoming_values()) {
5354 if (match(V, P: m_Freeze(Op: m_Undef())))
5355 continue;
5356
5357 Constant *C = dyn_cast<Constant>(Val: V);
5358 if (!C)
5359 return nullptr;
5360
5361 if (!isGuaranteedNotToBeUndefOrPoison(V: C))
5362 return nullptr;
5363
5364 if (BestValue && BestValue != C)
5365 return nullptr;
5366
5367 BestValue = C;
5368 }
5369 return BestValue;
5370 };
5371
5372 Value *NullValue = Constant::getNullValue(Ty);
5373 Value *BestValue = nullptr;
5374 for (auto *U : I.users()) {
5375 Value *V = NullValue;
5376 if (match(V: U, P: m_Or(L: m_Value(), R: m_Value())))
5377 V = ConstantInt::getAllOnesValue(Ty);
5378 else if (match(V: U, P: m_Select(C: m_Specific(V: &I), L: m_Constant(), R: m_Value())))
5379 V = ConstantInt::getTrue(Ty);
5380 else if (match(V: U, P: m_c_Select(L: m_Specific(V: &I), R: m_Value(V)))) {
5381 if (V == &I || !isGuaranteedNotToBeUndefOrPoison(V, AC: &AC, CtxI: &I, DT: &DT))
5382 V = NullValue;
5383 } else if (auto *PHI = dyn_cast<PHINode>(Val: U)) {
5384 if (Value *MaybeV = pickCommonConstantFromPHI(*PHI))
5385 V = MaybeV;
5386 }
5387
5388 if (!BestValue)
5389 BestValue = V;
5390 else if (BestValue != V)
5391 BestValue = NullValue;
5392 }
5393 assert(BestValue && "Must have at least one use");
5394 assert(BestValue != &I && "Cannot replace with itself");
5395 return BestValue;
5396 };
5397
5398 if (match(V: Op0, P: m_Undef())) {
5399 // Don't fold freeze(undef/poison) if it's used as a vector operand in
5400 // a shuffle. This may improve codegen for shuffles that allow
5401 // unspecified inputs.
5402 if (isUsedWithinShuffleVector(V: &I))
5403 return nullptr;
5404 return replaceInstUsesWith(I, V: getUndefReplacement(I.getType()));
5405 }
5406
5407 auto getFreezeVectorReplacement = [](Constant *C) -> Constant * {
5408 Type *Ty = C->getType();
5409 auto *VTy = dyn_cast<FixedVectorType>(Val: Ty);
5410 if (!VTy)
5411 return nullptr;
5412 unsigned NumElts = VTy->getNumElements();
5413 Constant *BestValue = Constant::getNullValue(Ty: VTy->getScalarType());
5414 for (unsigned i = 0; i != NumElts; ++i) {
5415 Constant *EltC = C->getAggregateElement(Elt: i);
5416 if (EltC && !match(V: EltC, P: m_Undef())) {
5417 BestValue = EltC;
5418 break;
5419 }
5420 }
5421 return Constant::replaceUndefsWith(C, Replacement: BestValue);
5422 };
5423
5424 Constant *C;
5425 if (match(V: Op0, P: m_Constant(C)) && C->containsUndefOrPoisonElement() &&
5426 !C->containsConstantExpression()) {
5427 if (Constant *Repl = getFreezeVectorReplacement(C))
5428 return replaceInstUsesWith(I, V: Repl);
5429 }
5430
5431 // Replace uses of Op with freeze(Op).
5432 if (freezeOtherUses(FI&: I))
5433 return &I;
5434
5435 return nullptr;
5436}
5437
5438/// Check for case where the call writes to an otherwise dead alloca. This
5439/// shows up for unused out-params in idiomatic C/C++ code. Note that this
5440/// helper *only* analyzes the write; doesn't check any other legality aspect.
5441static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
5442 auto *CB = dyn_cast<CallBase>(Val: I);
5443 if (!CB)
5444 // TODO: handle e.g. store to alloca here - only worth doing if we extend
5445 // to allow reload along used path as described below. Otherwise, this
5446 // is simply a store to a dead allocation which will be removed.
5447 return false;
5448 std::optional<MemoryLocation> Dest = MemoryLocation::getForDest(CI: CB, TLI);
5449 if (!Dest)
5450 return false;
5451 auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: Dest->Ptr));
5452 if (!AI)
5453 // TODO: allow malloc?
5454 return false;
5455 // TODO: allow memory access dominated by move point? Note that since AI
5456 // could have a reference to itself captured by the call, we would need to
5457 // account for cycles in doing so.
5458 SmallVector<const User *> AllocaUsers;
5459 SmallPtrSet<const User *, 4> Visited;
5460 auto pushUsers = [&](const Instruction &I) {
5461 for (const User *U : I.users()) {
5462 if (Visited.insert(Ptr: U).second)
5463 AllocaUsers.push_back(Elt: U);
5464 }
5465 };
5466 pushUsers(*AI);
5467 while (!AllocaUsers.empty()) {
5468 auto *UserI = cast<Instruction>(Val: AllocaUsers.pop_back_val());
5469 if (isa<GetElementPtrInst>(Val: UserI) || isa<AddrSpaceCastInst>(Val: UserI)) {
5470 pushUsers(*UserI);
5471 continue;
5472 }
5473 if (UserI == CB)
5474 continue;
5475 // TODO: support lifetime.start/end here
5476 return false;
5477 }
5478 return true;
5479}
5480
5481/// Try to move the specified instruction from its current block into the
5482/// beginning of DestBlock, which can only happen if it's safe to move the
5483/// instruction past all of the instructions between it and the end of its
5484/// block.
5485bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
5486 BasicBlock *DestBlock) {
5487 BasicBlock *SrcBlock = I->getParent();
5488
5489 // Cannot move control-flow-involving, volatile loads, vaarg, etc.
5490 if (isa<PHINode>(Val: I) || I->isEHPad() || I->mayThrow() || !I->willReturn() ||
5491 I->isTerminator())
5492 return false;
5493
5494 // Do not sink static or dynamic alloca instructions. Static allocas must
5495 // remain in the entry block, and dynamic allocas must not be sunk in between
5496 // a stacksave / stackrestore pair, which would incorrectly shorten its
5497 // lifetime.
5498 if (isa<AllocaInst>(Val: I))
5499 return false;
5500
5501 // Do not sink into catchswitch blocks.
5502 if (isa<CatchSwitchInst>(Val: DestBlock->getTerminator()))
5503 return false;
5504
5505 // Do not sink convergent call instructions.
5506 if (auto *CI = dyn_cast<CallInst>(Val: I)) {
5507 if (CI->isConvergent())
5508 return false;
5509 }
5510
5511 // Unless we can prove that the memory write isn't visibile except on the
5512 // path we're sinking to, we must bail.
5513 if (I->mayWriteToMemory()) {
5514 if (!SoleWriteToDeadLocal(I, TLI))
5515 return false;
5516 }
5517
5518 // We can only sink load instructions if there is nothing between the load and
5519 // the end of block that could change the value.
5520 if (I->mayReadFromMemory() &&
5521 !I->hasMetadata(KindID: LLVMContext::MD_invariant_load)) {
5522 // We don't want to do any sophisticated alias analysis, so we only check
5523 // the instructions after I in I's parent block if we try to sink to its
5524 // successor block.
5525 if (DestBlock->getUniquePredecessor() != I->getParent())
5526 return false;
5527 for (BasicBlock::iterator Scan = std::next(x: I->getIterator()),
5528 E = I->getParent()->end();
5529 Scan != E; ++Scan)
5530 if (Scan->mayWriteToMemory())
5531 return false;
5532 }
5533
5534 I->dropDroppableUses(ShouldDrop: [&](const Use *U) {
5535 auto *I = dyn_cast<Instruction>(Val: U->getUser());
5536 if (I && I->getParent() != DestBlock) {
5537 Worklist.add(I);
5538 return true;
5539 }
5540 return false;
5541 });
5542 /// FIXME: We could remove droppable uses that are not dominated by
5543 /// the new position.
5544
5545 BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
5546 I->moveBefore(BB&: *DestBlock, I: InsertPos);
5547 ++NumSunkInst;
5548
5549 // Also sink all related debug uses from the source basic block. Otherwise we
5550 // get debug use before the def. Attempt to salvage debug uses first, to
5551 // maximise the range variables have location for. If we cannot salvage, then
5552 // mark the location undef: we know it was supposed to receive a new location
5553 // here, but that computation has been sunk.
5554 SmallVector<DbgVariableRecord *, 2> DbgVariableRecords;
5555 findDbgUsers(V: I, DbgVariableRecords);
5556 if (!DbgVariableRecords.empty())
5557 tryToSinkInstructionDbgVariableRecords(I, InsertPos, SrcBlock, DestBlock,
5558 DPUsers&: DbgVariableRecords);
5559
5560 // PS: there are numerous flaws with this behaviour, not least that right now
5561 // assignments can be re-ordered past other assignments to the same variable
5562 // if they use different Values. Creating more undef assignements can never be
5563 // undone. And salvaging all users outside of this block can un-necessarily
5564 // alter the lifetime of the live-value that the variable refers to.
5565 // Some of these things can be resolved by tolerating debug use-before-defs in
5566 // LLVM-IR, however it depends on the instruction-referencing CodeGen backend
5567 // being used for more architectures.
5568
5569 return true;
5570}
5571
5572void InstCombinerImpl::tryToSinkInstructionDbgVariableRecords(
5573 Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
5574 BasicBlock *DestBlock,
5575 SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords) {
5576 // For all debug values in the destination block, the sunk instruction
5577 // will still be available, so they do not need to be dropped.
5578
5579 // Fetch all DbgVariableRecords not already in the destination.
5580 SmallVector<DbgVariableRecord *, 2> DbgVariableRecordsToSalvage;
5581 for (auto &DVR : DbgVariableRecords)
5582 if (DVR->getParent() != DestBlock)
5583 DbgVariableRecordsToSalvage.push_back(Elt: DVR);
5584
5585 // Fetch a second collection, of DbgVariableRecords in the source block that
5586 // we're going to sink.
5587 SmallVector<DbgVariableRecord *> DbgVariableRecordsToSink;
5588 for (DbgVariableRecord *DVR : DbgVariableRecordsToSalvage)
5589 if (DVR->getParent() == SrcBlock)
5590 DbgVariableRecordsToSink.push_back(Elt: DVR);
5591
5592 // Sort DbgVariableRecords according to their position in the block. This is a
5593 // partial order: DbgVariableRecords attached to different instructions will
5594 // be ordered by the instruction order, but DbgVariableRecords attached to the
5595 // same instruction won't have an order.
5596 auto Order = [](DbgVariableRecord *A, DbgVariableRecord *B) -> bool {
5597 return B->getInstruction()->comesBefore(Other: A->getInstruction());
5598 };
5599 llvm::stable_sort(Range&: DbgVariableRecordsToSink, C: Order);
5600
5601 // If there are two assignments to the same variable attached to the same
5602 // instruction, the ordering between the two assignments is important. Scan
5603 // for this (rare) case and establish which is the last assignment.
5604 using InstVarPair = std::pair<const Instruction *, DebugVariable>;
5605 SmallDenseMap<InstVarPair, DbgVariableRecord *> FilterOutMap;
5606 if (DbgVariableRecordsToSink.size() > 1) {
5607 SmallDenseMap<InstVarPair, unsigned> CountMap;
5608 // Count how many assignments to each variable there is per instruction.
5609 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5610 DebugVariable DbgUserVariable =
5611 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5612 DVR->getDebugLoc()->getInlinedAt());
5613 CountMap[std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable)] += 1;
5614 }
5615
5616 // If there are any instructions with two assignments, add them to the
5617 // FilterOutMap to record that they need extra filtering.
5618 SmallPtrSet<const Instruction *, 4> DupSet;
5619 for (auto It : CountMap) {
5620 if (It.second > 1) {
5621 FilterOutMap[It.first] = nullptr;
5622 DupSet.insert(Ptr: It.first.first);
5623 }
5624 }
5625
5626 // For all instruction/variable pairs needing extra filtering, find the
5627 // latest assignment.
5628 for (const Instruction *Inst : DupSet) {
5629 for (DbgVariableRecord &DVR :
5630 llvm::reverse(C: filterDbgVars(R: Inst->getDbgRecordRange()))) {
5631 DebugVariable DbgUserVariable =
5632 DebugVariable(DVR.getVariable(), DVR.getExpression(),
5633 DVR.getDebugLoc()->getInlinedAt());
5634 auto FilterIt =
5635 FilterOutMap.find(Val: std::make_pair(x&: Inst, y&: DbgUserVariable));
5636 if (FilterIt == FilterOutMap.end())
5637 continue;
5638 if (FilterIt->second != nullptr)
5639 continue;
5640 FilterIt->second = &DVR;
5641 }
5642 }
5643 }
5644
5645 // Perform cloning of the DbgVariableRecords that we plan on sinking, filter
5646 // out any duplicate assignments identified above.
5647 SmallVector<DbgVariableRecord *, 2> DVRClones;
5648 SmallSet<DebugVariable, 4> SunkVariables;
5649 for (DbgVariableRecord *DVR : DbgVariableRecordsToSink) {
5650 if (DVR->Type == DbgVariableRecord::LocationType::Declare)
5651 continue;
5652
5653 DebugVariable DbgUserVariable =
5654 DebugVariable(DVR->getVariable(), DVR->getExpression(),
5655 DVR->getDebugLoc()->getInlinedAt());
5656
5657 // For any variable where there were multiple assignments in the same place,
5658 // ignore all but the last assignment.
5659 if (!FilterOutMap.empty()) {
5660 InstVarPair IVP = std::make_pair(x: DVR->getInstruction(), y&: DbgUserVariable);
5661 auto It = FilterOutMap.find(Val: IVP);
5662
5663 // Filter out.
5664 if (It != FilterOutMap.end() && It->second != DVR)
5665 continue;
5666 }
5667
5668 if (!SunkVariables.insert(V: DbgUserVariable).second)
5669 continue;
5670
5671 if (DVR->isDbgAssign())
5672 continue;
5673
5674 DVRClones.emplace_back(Args: DVR->clone());
5675 LLVM_DEBUG(dbgs() << "CLONE: " << *DVRClones.back() << '\n');
5676 }
5677
5678 // Perform salvaging without the clones, then sink the clones.
5679 if (DVRClones.empty())
5680 return;
5681
5682 salvageDebugInfoForDbgValues(I&: *I, DPInsns: DbgVariableRecordsToSalvage);
5683
5684 // The clones are in reverse order of original appearance. Assert that the
5685 // head bit is set on the iterator as we _should_ have received it via
5686 // getFirstInsertionPt. Inserting like this will reverse the clone order as
5687 // we'll repeatedly insert at the head, such as:
5688 // DVR-3 (third insertion goes here)
5689 // DVR-2 (second insertion goes here)
5690 // DVR-1 (first insertion goes here)
5691 // Any-Prior-DVRs
5692 // InsertPtInst
5693 assert(InsertPos.getHeadBit());
5694 for (DbgVariableRecord *DVRClone : DVRClones) {
5695 InsertPos->getParent()->insertDbgRecordBefore(DR: DVRClone, Here: InsertPos);
5696 LLVM_DEBUG(dbgs() << "SINK: " << *DVRClone << '\n');
5697 }
5698}
5699
5700bool InstCombinerImpl::run() {
5701 while (!Worklist.isEmpty()) {
5702 // Walk deferred instructions in reverse order, and push them to the
5703 // worklist, which means they'll end up popped from the worklist in-order.
5704 while (Instruction *I = Worklist.popDeferred()) {
5705 // Check to see if we can DCE the instruction. We do this already here to
5706 // reduce the number of uses and thus allow other folds to trigger.
5707 // Note that eraseInstFromFunction() may push additional instructions on
5708 // the deferred worklist, so this will DCE whole instruction chains.
5709 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5710 eraseInstFromFunction(I&: *I);
5711 ++NumDeadInst;
5712 continue;
5713 }
5714
5715 Worklist.push(I);
5716 }
5717
5718 Instruction *I = Worklist.removeOne();
5719 if (I == nullptr) continue; // skip null values.
5720
5721 // Check to see if we can DCE the instruction.
5722 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5723 eraseInstFromFunction(I&: *I);
5724 ++NumDeadInst;
5725 continue;
5726 }
5727
5728 if (!DebugCounter::shouldExecute(Counter&: VisitCounter))
5729 continue;
5730
5731 // See if we can trivially sink this instruction to its user if we can
5732 // prove that the successor is not executed more frequently than our block.
5733 // Return the UserBlock if successful.
5734 auto getOptionalSinkBlockForInst =
5735 [this](Instruction *I) -> std::optional<BasicBlock *> {
5736 if (!EnableCodeSinking)
5737 return std::nullopt;
5738
5739 BasicBlock *BB = I->getParent();
5740 BasicBlock *UserParent = nullptr;
5741 unsigned NumUsers = 0;
5742
5743 for (Use &U : I->uses()) {
5744 User *User = U.getUser();
5745 if (User->isDroppable()) {
5746 // Do not sink if there are dereferenceable assumes that would be
5747 // removed.
5748 auto II = dyn_cast<IntrinsicInst>(Val: User);
5749 if (II->getIntrinsicID() != Intrinsic::assume ||
5750 !II->getOperandBundle(Name: "dereferenceable"))
5751 continue;
5752 }
5753
5754 if (NumUsers > MaxSinkNumUsers)
5755 return std::nullopt;
5756
5757 Instruction *UserInst = cast<Instruction>(Val: User);
5758 // Special handling for Phi nodes - get the block the use occurs in.
5759 BasicBlock *UserBB = UserInst->getParent();
5760 if (PHINode *PN = dyn_cast<PHINode>(Val: UserInst))
5761 UserBB = PN->getIncomingBlock(U);
5762 // Bail out if we have uses in different blocks. We don't do any
5763 // sophisticated analysis (i.e finding NearestCommonDominator of these
5764 // use blocks).
5765 if (UserParent && UserParent != UserBB)
5766 return std::nullopt;
5767 UserParent = UserBB;
5768
5769 // Make sure these checks are done only once, naturally we do the checks
5770 // the first time we get the userparent, this will save compile time.
5771 if (NumUsers == 0) {
5772 // Try sinking to another block. If that block is unreachable, then do
5773 // not bother. SimplifyCFG should handle it.
5774 if (UserParent == BB || !DT.isReachableFromEntry(A: UserParent))
5775 return std::nullopt;
5776
5777 auto *Term = UserParent->getTerminator();
5778 // See if the user is one of our successors that has only one
5779 // predecessor, so that we don't have to split the critical edge.
5780 // Another option where we can sink is a block that ends with a
5781 // terminator that does not pass control to other block (such as
5782 // return or unreachable or resume). In this case:
5783 // - I dominates the User (by SSA form);
5784 // - the User will be executed at most once.
5785 // So sinking I down to User is always profitable or neutral.
5786 if (UserParent->getUniquePredecessor() != BB && !succ_empty(I: Term))
5787 return std::nullopt;
5788
5789 assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
5790 }
5791
5792 NumUsers++;
5793 }
5794
5795 // No user or only has droppable users.
5796 if (!UserParent)
5797 return std::nullopt;
5798
5799 return UserParent;
5800 };
5801
5802 auto OptBB = getOptionalSinkBlockForInst(I);
5803 if (OptBB) {
5804 auto *UserParent = *OptBB;
5805 // Okay, the CFG is simple enough, try to sink this instruction.
5806 if (tryToSinkInstruction(I, DestBlock: UserParent)) {
5807 LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
5808 MadeIRChange = true;
5809 // We'll add uses of the sunk instruction below, but since
5810 // sinking can expose opportunities for it's *operands* add
5811 // them to the worklist
5812 for (Use &U : I->operands())
5813 if (Instruction *OpI = dyn_cast<Instruction>(Val: U.get()))
5814 Worklist.push(I: OpI);
5815 }
5816 }
5817
5818 // Now that we have an instruction, try combining it to simplify it.
5819 Builder.SetInsertPoint(I);
5820 Builder.CollectMetadataToCopy(
5821 Src: I, MetadataKinds: {LLVMContext::MD_dbg, LLVMContext::MD_annotation});
5822
5823#ifndef NDEBUG
5824 std::string OrigI;
5825#endif
5826 LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS););
5827 LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n');
5828
5829 if (Instruction *Result = visit(I&: *I)) {
5830 ++NumCombined;
5831 // Should we replace the old instruction with a new one?
5832 if (Result != I) {
5833 LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n'
5834 << " New = " << *Result << '\n');
5835
5836 // We copy the old instruction's DebugLoc to the new instruction, unless
5837 // InstCombine already assigned a DebugLoc to it, in which case we
5838 // should trust the more specifically selected DebugLoc.
5839 Result->setDebugLoc(Result->getDebugLoc().orElse(Other: I->getDebugLoc()));
5840 // We also copy annotation metadata to the new instruction.
5841 Result->copyMetadata(SrcInst: *I, WL: LLVMContext::MD_annotation);
5842 // Everything uses the new instruction now.
5843 I->replaceAllUsesWith(V: Result);
5844
5845 // Move the name to the new instruction first.
5846 Result->takeName(V: I);
5847
5848 // Insert the new instruction into the basic block...
5849 BasicBlock *InstParent = I->getParent();
5850 BasicBlock::iterator InsertPos = I->getIterator();
5851
5852 // Are we replace a PHI with something that isn't a PHI, or vice versa?
5853 if (isa<PHINode>(Val: Result) != isa<PHINode>(Val: I)) {
5854 // We need to fix up the insertion point.
5855 if (isa<PHINode>(Val: I)) // PHI -> Non-PHI
5856 InsertPos = InstParent->getFirstInsertionPt();
5857 else // Non-PHI -> PHI
5858 InsertPos = InstParent->getFirstNonPHIIt();
5859 }
5860
5861 Result->insertInto(ParentBB: InstParent, It: InsertPos);
5862
5863 // Push the new instruction and any users onto the worklist.
5864 Worklist.pushUsersToWorkList(I&: *Result);
5865 Worklist.push(I: Result);
5866
5867 eraseInstFromFunction(I&: *I);
5868 } else {
5869 LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
5870 << " New = " << *I << '\n');
5871
5872 // If the instruction was modified, it's possible that it is now dead.
5873 // if so, remove it.
5874 if (isInstructionTriviallyDead(I, TLI: &TLI)) {
5875 eraseInstFromFunction(I&: *I);
5876 } else {
5877 Worklist.pushUsersToWorkList(I&: *I);
5878 Worklist.push(I);
5879 }
5880 }
5881 MadeIRChange = true;
5882 }
5883 }
5884
5885 Worklist.zap();
5886 return MadeIRChange;
5887}
5888
5889// Track the scopes used by !alias.scope and !noalias. In a function, a
5890// @llvm.experimental.noalias.scope.decl is only useful if that scope is used
5891// by both sets. If not, the declaration of the scope can be safely omitted.
5892// The MDNode of the scope can be omitted as well for the instructions that are
5893// part of this function. We do not do that at this point, as this might become
5894// too time consuming to do.
5895class AliasScopeTracker {
5896 SmallPtrSet<const MDNode *, 8> UsedAliasScopesAndLists;
5897 SmallPtrSet<const MDNode *, 8> UsedNoAliasScopesAndLists;
5898
5899public:
5900 void analyse(Instruction *I) {
5901 // This seems to be faster than checking 'mayReadOrWriteMemory()'.
5902 if (!I->hasMetadataOtherThanDebugLoc())
5903 return;
5904
5905 auto Track = [](Metadata *ScopeList, auto &Container) {
5906 const auto *MDScopeList = dyn_cast_or_null<MDNode>(Val: ScopeList);
5907 if (!MDScopeList || !Container.insert(MDScopeList).second)
5908 return;
5909 for (const auto &MDOperand : MDScopeList->operands())
5910 if (auto *MDScope = dyn_cast<MDNode>(Val: MDOperand))
5911 Container.insert(MDScope);
5912 };
5913
5914 Track(I->getMetadata(KindID: LLVMContext::MD_alias_scope), UsedAliasScopesAndLists);
5915 Track(I->getMetadata(KindID: LLVMContext::MD_noalias), UsedNoAliasScopesAndLists);
5916 }
5917
5918 bool isNoAliasScopeDeclDead(Instruction *Inst) {
5919 NoAliasScopeDeclInst *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: Inst);
5920 if (!Decl)
5921 return false;
5922
5923 assert(Decl->use_empty() &&
5924 "llvm.experimental.noalias.scope.decl in use ?");
5925 const MDNode *MDSL = Decl->getScopeList();
5926 assert(MDSL->getNumOperands() == 1 &&
5927 "llvm.experimental.noalias.scope should refer to a single scope");
5928 auto &MDOperand = MDSL->getOperand(I: 0);
5929 if (auto *MD = dyn_cast<MDNode>(Val: MDOperand))
5930 return !UsedAliasScopesAndLists.contains(Ptr: MD) ||
5931 !UsedNoAliasScopesAndLists.contains(Ptr: MD);
5932
5933 // Not an MDNode ? throw away.
5934 return true;
5935 }
5936};
5937
5938/// Populate the IC worklist from a function, by walking it in reverse
5939/// post-order and adding all reachable code to the worklist.
5940///
5941/// This has a couple of tricks to make the code faster and more powerful. In
5942/// particular, we constant fold and DCE instructions as we go, to avoid adding
5943/// them to the worklist (this significantly speeds up instcombine on code where
5944/// many instructions are dead or constant). Additionally, if we find a branch
5945/// whose condition is a known constant, we only visit the reachable successors.
5946bool InstCombinerImpl::prepareWorklist(Function &F) {
5947 bool MadeIRChange = false;
5948 SmallPtrSet<BasicBlock *, 32> LiveBlocks;
5949 SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
5950 DenseMap<Constant *, Constant *> FoldedConstants;
5951 AliasScopeTracker SeenAliasScopes;
5952
5953 auto HandleOnlyLiveSuccessor = [&](BasicBlock *BB, BasicBlock *LiveSucc) {
5954 for (BasicBlock *Succ : successors(BB))
5955 if (Succ != LiveSucc && DeadEdges.insert(V: {BB, Succ}).second)
5956 for (PHINode &PN : Succ->phis())
5957 for (Use &U : PN.incoming_values())
5958 if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(Val: U)) {
5959 U.set(PoisonValue::get(T: PN.getType()));
5960 MadeIRChange = true;
5961 }
5962 };
5963
5964 for (BasicBlock *BB : RPOT) {
5965 if (!BB->isEntryBlock() && all_of(Range: predecessors(BB), P: [&](BasicBlock *Pred) {
5966 return DeadEdges.contains(V: {Pred, BB}) || DT.dominates(A: BB, B: Pred);
5967 })) {
5968 HandleOnlyLiveSuccessor(BB, nullptr);
5969 continue;
5970 }
5971 LiveBlocks.insert(Ptr: BB);
5972
5973 for (Instruction &Inst : llvm::make_early_inc_range(Range&: *BB)) {
5974 // ConstantProp instruction if trivially constant.
5975 if (!Inst.use_empty() &&
5976 (Inst.getNumOperands() == 0 || isa<Constant>(Val: Inst.getOperand(i: 0))))
5977 if (Constant *C = ConstantFoldInstruction(I: &Inst, DL, TLI: &TLI)) {
5978 LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
5979 << '\n');
5980 Inst.replaceAllUsesWith(V: C);
5981 ++NumConstProp;
5982 if (isInstructionTriviallyDead(I: &Inst, TLI: &TLI))
5983 Inst.eraseFromParent();
5984 MadeIRChange = true;
5985 continue;
5986 }
5987
5988 // See if we can constant fold its operands.
5989 for (Use &U : Inst.operands()) {
5990 if (!isa<ConstantVector>(Val: U) && !isa<ConstantExpr>(Val: U))
5991 continue;
5992
5993 auto *C = cast<Constant>(Val&: U);
5994 Constant *&FoldRes = FoldedConstants[C];
5995 if (!FoldRes)
5996 FoldRes = ConstantFoldConstant(C, DL, TLI: &TLI);
5997
5998 if (FoldRes != C) {
5999 LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
6000 << "\n Old = " << *C
6001 << "\n New = " << *FoldRes << '\n');
6002 U = FoldRes;
6003 MadeIRChange = true;
6004 }
6005 }
6006
6007 // Skip processing debug and pseudo intrinsics in InstCombine. Processing
6008 // these call instructions consumes non-trivial amount of time and
6009 // provides no value for the optimization.
6010 if (!Inst.isDebugOrPseudoInst()) {
6011 InstrsForInstructionWorklist.push_back(Elt: &Inst);
6012 SeenAliasScopes.analyse(I: &Inst);
6013 }
6014 }
6015
6016 // If this is a branch or switch on a constant, mark only the single
6017 // live successor. Otherwise assume all successors are live.
6018 Instruction *TI = BB->getTerminator();
6019 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI); BI && BI->isConditional()) {
6020 if (isa<UndefValue>(Val: BI->getCondition())) {
6021 // Branch on undef is UB.
6022 HandleOnlyLiveSuccessor(BB, nullptr);
6023 continue;
6024 }
6025 if (auto *Cond = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
6026 bool CondVal = Cond->getZExtValue();
6027 HandleOnlyLiveSuccessor(BB, BI->getSuccessor(i: !CondVal));
6028 continue;
6029 }
6030 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
6031 if (isa<UndefValue>(Val: SI->getCondition())) {
6032 // Switch on undef is UB.
6033 HandleOnlyLiveSuccessor(BB, nullptr);
6034 continue;
6035 }
6036 if (auto *Cond = dyn_cast<ConstantInt>(Val: SI->getCondition())) {
6037 HandleOnlyLiveSuccessor(BB,
6038 SI->findCaseValue(C: Cond)->getCaseSuccessor());
6039 continue;
6040 }
6041 }
6042 }
6043
6044 // Remove instructions inside unreachable blocks. This prevents the
6045 // instcombine code from having to deal with some bad special cases, and
6046 // reduces use counts of instructions.
6047 for (BasicBlock &BB : F) {
6048 if (LiveBlocks.count(Ptr: &BB))
6049 continue;
6050
6051 unsigned NumDeadInstInBB;
6052 NumDeadInstInBB = removeAllNonTerminatorAndEHPadInstructions(BB: &BB);
6053
6054 MadeIRChange |= NumDeadInstInBB != 0;
6055 NumDeadInst += NumDeadInstInBB;
6056 }
6057
6058 // Once we've found all of the instructions to add to instcombine's worklist,
6059 // add them in reverse order. This way instcombine will visit from the top
6060 // of the function down. This jives well with the way that it adds all uses
6061 // of instructions to the worklist after doing a transformation, thus avoiding
6062 // some N^2 behavior in pathological cases.
6063 Worklist.reserve(Size: InstrsForInstructionWorklist.size());
6064 for (Instruction *Inst : reverse(C&: InstrsForInstructionWorklist)) {
6065 // DCE instruction if trivially dead. As we iterate in reverse program
6066 // order here, we will clean up whole chains of dead instructions.
6067 if (isInstructionTriviallyDead(I: Inst, TLI: &TLI) ||
6068 SeenAliasScopes.isNoAliasScopeDeclDead(Inst)) {
6069 ++NumDeadInst;
6070 LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
6071 salvageDebugInfo(I&: *Inst);
6072 Inst->eraseFromParent();
6073 MadeIRChange = true;
6074 continue;
6075 }
6076
6077 Worklist.push(I: Inst);
6078 }
6079
6080 return MadeIRChange;
6081}
6082
6083void InstCombiner::computeBackEdges() {
6084 // Collect backedges.
6085 SmallPtrSet<BasicBlock *, 16> Visited;
6086 for (BasicBlock *BB : RPOT) {
6087 Visited.insert(Ptr: BB);
6088 for (BasicBlock *Succ : successors(BB))
6089 if (Visited.contains(Ptr: Succ))
6090 BackEdges.insert(V: {BB, Succ});
6091 }
6092 ComputedBackEdges = true;
6093}
6094
6095static bool combineInstructionsOverFunction(
6096 Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
6097 AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
6098 DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
6099 BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
6100 const InstCombineOptions &Opts) {
6101 auto &DL = F.getDataLayout();
6102 bool VerifyFixpoint = Opts.VerifyFixpoint &&
6103 !F.hasFnAttribute(Kind: "instcombine-no-verify-fixpoint");
6104
6105 /// Builder - This is an IRBuilder that automatically inserts new
6106 /// instructions into the worklist when they are created.
6107 IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
6108 F.getContext(), TargetFolder(DL),
6109 IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
6110 Worklist.add(I);
6111 if (auto *Assume = dyn_cast<AssumeInst>(Val: I))
6112 AC.registerAssumption(CI: Assume);
6113 }));
6114
6115 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.front());
6116
6117 // Lower dbg.declare intrinsics otherwise their value may be clobbered
6118 // by instcombiner.
6119 bool MadeIRChange = false;
6120 if (ShouldLowerDbgDeclare)
6121 MadeIRChange = LowerDbgDeclare(F);
6122
6123 // Iterate while there is work to do.
6124 unsigned Iteration = 0;
6125 while (true) {
6126 if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) {
6127 LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
6128 << " on " << F.getName()
6129 << " reached; stopping without verifying fixpoint\n");
6130 break;
6131 }
6132
6133 ++Iteration;
6134 ++NumWorklistIterations;
6135 LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
6136 << F.getName() << "\n");
6137
6138 InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
6139 BPI, PSI, DL, RPOT);
6140 IC.MaxArraySizeForCombine = MaxArraySize;
6141 bool MadeChangeInThisIteration = IC.prepareWorklist(F);
6142 MadeChangeInThisIteration |= IC.run();
6143 if (!MadeChangeInThisIteration)
6144 break;
6145
6146 MadeIRChange = true;
6147 if (Iteration > Opts.MaxIterations) {
6148 reportFatalUsageError(
6149 reason: "Instruction Combining on " + Twine(F.getName()) +
6150 " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
6151 " iterations. " +
6152 "Use 'instcombine<no-verify-fixpoint>' or function attribute "
6153 "'instcombine-no-verify-fixpoint' to suppress this error.");
6154 }
6155 }
6156
6157 if (Iteration == 1)
6158 ++NumOneIteration;
6159 else if (Iteration == 2)
6160 ++NumTwoIterations;
6161 else if (Iteration == 3)
6162 ++NumThreeIterations;
6163 else
6164 ++NumFourOrMoreIterations;
6165
6166 return MadeIRChange;
6167}
6168
6169InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
6170
6171void InstCombinePass::printPipeline(
6172 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6173 static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
6174 OS, MapClassName2PassName);
6175 OS << '<';
6176 OS << "max-iterations=" << Options.MaxIterations << ";";
6177 OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint";
6178 OS << '>';
6179}
6180
6181char InstCombinePass::ID = 0;
6182
6183PreservedAnalyses InstCombinePass::run(Function &F,
6184 FunctionAnalysisManager &AM) {
6185 auto &LRT = AM.getResult<LastRunTrackingAnalysis>(IR&: F);
6186 // No changes since last InstCombine pass, exit early.
6187 if (LRT.shouldSkip(ID: &ID))
6188 return PreservedAnalyses::all();
6189
6190 auto &AC = AM.getResult<AssumptionAnalysis>(IR&: F);
6191 auto &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
6192 auto &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
6193 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
6194 auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
6195
6196 auto *AA = &AM.getResult<AAManager>(IR&: F);
6197 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
6198 ProfileSummaryInfo *PSI =
6199 MAMProxy.getCachedResult<ProfileSummaryAnalysis>(IR&: *F.getParent());
6200 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
6201 &AM.getResult<BlockFrequencyAnalysis>(IR&: F) : nullptr;
6202 auto *BPI = AM.getCachedResult<BranchProbabilityAnalysis>(IR&: F);
6203
6204 if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6205 BFI, BPI, PSI, Opts: Options)) {
6206 // No changes, all analyses are preserved.
6207 LRT.update(ID: &ID, /*Changed=*/false);
6208 return PreservedAnalyses::all();
6209 }
6210
6211 // Mark all the analyses that instcombine updates as preserved.
6212 PreservedAnalyses PA;
6213 LRT.update(ID: &ID, /*Changed=*/true);
6214 PA.preserve<LastRunTrackingAnalysis>();
6215 PA.preserveSet<CFGAnalyses>();
6216 return PA;
6217}
6218
6219void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
6220 AU.setPreservesCFG();
6221 AU.addRequired<AAResultsWrapperPass>();
6222 AU.addRequired<AssumptionCacheTracker>();
6223 AU.addRequired<TargetLibraryInfoWrapperPass>();
6224 AU.addRequired<TargetTransformInfoWrapperPass>();
6225 AU.addRequired<DominatorTreeWrapperPass>();
6226 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
6227 AU.addPreserved<DominatorTreeWrapperPass>();
6228 AU.addPreserved<AAResultsWrapperPass>();
6229 AU.addPreserved<BasicAAWrapperPass>();
6230 AU.addPreserved<GlobalsAAWrapperPass>();
6231 AU.addRequired<ProfileSummaryInfoWrapperPass>();
6232 LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
6233}
6234
6235bool InstructionCombiningPass::runOnFunction(Function &F) {
6236 if (skipFunction(F))
6237 return false;
6238
6239 // Required analyses.
6240 auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
6241 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6242 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
6243 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
6244 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6245 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
6246
6247 // Optional analyses.
6248 ProfileSummaryInfo *PSI =
6249 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
6250 BlockFrequencyInfo *BFI =
6251 (PSI && PSI->hasProfileSummary()) ?
6252 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
6253 nullptr;
6254 BranchProbabilityInfo *BPI = nullptr;
6255 if (auto *WrapperPass =
6256 getAnalysisIfAvailable<BranchProbabilityInfoWrapperPass>())
6257 BPI = &WrapperPass->getBPI();
6258
6259 return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
6260 BFI, BPI, PSI, Opts: InstCombineOptions());
6261}
6262
6263char InstructionCombiningPass::ID = 0;
6264
6265InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {
6266 initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
6267}
6268
6269INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
6270 "Combine redundant instructions", false, false)
6271INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
6272INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
6273INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
6274INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
6275INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
6276INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
6277INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
6278INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
6279INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
6280INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
6281 "Combine redundant instructions", false, false)
6282
6283// Initialization Routines
6284void llvm::initializeInstCombine(PassRegistry &Registry) {
6285 initializeInstructionCombiningPassPass(Registry);
6286}
6287
6288FunctionPass *llvm::createInstructionCombiningPass() {
6289 return new InstructionCombiningPass();
6290}
6291