1//===- InstCombiner.h - InstCombine implementation --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the interface for the instcombine pass implementation.
11/// The interface is used for generic transformations in this folder and
12/// target specific combinations in the targets.
13/// The visitor implementation is in \c InstCombinerImpl in
14/// \c InstCombineInternal.h.
15///
16//===----------------------------------------------------------------------===//
17
18#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
19#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H
20
21#include "llvm/Analysis/DomConditionCache.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/TargetFolder.h"
24#include "llvm/Analysis/ValueTracking.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/KnownBits.h"
29#include <cassert>
30
31#define DEBUG_TYPE "instcombine"
32#include "llvm/Transforms/Utils/InstructionWorklist.h"
33
34namespace llvm {
35
36class AAResults;
37class AssumptionCache;
38class OptimizationRemarkEmitter;
39class ProfileSummaryInfo;
40class TargetLibraryInfo;
41class TargetTransformInfo;
42
43/// The core instruction combiner logic.
44///
45/// This class provides both the logic to recursively visit instructions and
46/// combine them.
47class LLVM_LIBRARY_VISIBILITY InstCombiner {
48 /// Only used to call target specific intrinsic combining.
49 /// It must **NOT** be used for any other purpose, as InstCombine is a
50 /// target-independent canonicalization transform.
51 TargetTransformInfo &TTI;
52
53public:
54 /// Maximum size of array considered when transforming.
55 uint64_t MaxArraySizeForCombine = 0;
56
57 /// An IRBuilder that automatically inserts new instructions into the
58 /// worklist.
59 using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
60 BuilderTy &Builder;
61
62protected:
63 /// A worklist of the instructions that need to be simplified.
64 InstructionWorklist &Worklist;
65
66 // Mode in which we are running the combiner.
67 const bool MinimizeSize;
68
69 AAResults *AA;
70
71 // Required analyses.
72 AssumptionCache &AC;
73 TargetLibraryInfo &TLI;
74 DominatorTree &DT;
75 const DataLayout &DL;
76 SimplifyQuery SQ;
77 OptimizationRemarkEmitter &ORE;
78 BlockFrequencyInfo *BFI;
79 BranchProbabilityInfo *BPI;
80 ProfileSummaryInfo *PSI;
81 DomConditionCache DC;
82
83 // Optional analyses. When non-null, these can both be used to do better
84 // combining and will be updated to reflect any changes.
85 LoopInfo *LI;
86
87 bool MadeIRChange = false;
88
89 /// Edges that are known to never be taken.
90 SmallDenseSet<std::pair<BasicBlock *, BasicBlock *>, 8> DeadEdges;
91
92 /// Order of predecessors to canonicalize phi nodes towards.
93 SmallDenseMap<BasicBlock *, SmallVector<BasicBlock *>, 8> PredOrder;
94
95public:
96 InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
97 bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
98 TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
99 DominatorTree &DT, OptimizationRemarkEmitter &ORE,
100 BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
101 ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI)
102 : TTI(TTI), Builder(Builder), Worklist(Worklist),
103 MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
104 SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
105 /*CanUseUndef*/ true, &DC),
106 ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), LI(LI) {}
107
108 virtual ~InstCombiner() = default;
109
110 /// Return the source operand of a potentially bitcasted value while
111 /// optionally checking if it has one use. If there is no bitcast or the one
112 /// use check is not met, return the input value itself.
113 static Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) {
114 if (auto *BitCast = dyn_cast<BitCastInst>(Val: V))
115 if (!OneUseOnly || BitCast->hasOneUse())
116 return BitCast->getOperand(i_nocapture: 0);
117
118 // V is not a bitcast or V has more than one use and OneUseOnly is true.
119 return V;
120 }
121
122 /// Assign a complexity or rank value to LLVM Values. This is used to reduce
123 /// the amount of pattern matching needed for compares and commutative
124 /// instructions. For example, if we have:
125 /// icmp ugt X, Constant
126 /// or
127 /// xor (add X, Constant), cast Z
128 ///
129 /// We do not have to consider the commuted variants of these patterns because
130 /// canonicalization based on complexity guarantees the above ordering.
131 ///
132 /// This routine maps IR values to various complexity ranks:
133 /// 0 -> undef
134 /// 1 -> Constants
135 /// 2 -> Other non-instructions
136 /// 3 -> Arguments
137 /// 4 -> Cast and (f)neg/not instructions
138 /// 5 -> Other instructions
139 static unsigned getComplexity(Value *V) {
140 if (isa<Instruction>(Val: V)) {
141 if (isa<CastInst>(Val: V) || match(V, P: m_Neg(V: PatternMatch::m_Value())) ||
142 match(V, P: m_Not(V: PatternMatch::m_Value())) ||
143 match(V, P: m_FNeg(X: PatternMatch::m_Value())))
144 return 4;
145 return 5;
146 }
147 if (isa<Argument>(Val: V))
148 return 3;
149 return isa<Constant>(Val: V) ? (isa<UndefValue>(Val: V) ? 0 : 1) : 2;
150 }
151
152 /// Predicate canonicalization reduces the number of patterns that need to be
153 /// matched by other transforms. For example, we may swap the operands of a
154 /// conditional branch or select to create a compare with a canonical
155 /// (inverted) predicate which is then more likely to be matched with other
156 /// values.
157 static bool isCanonicalPredicate(CmpInst::Predicate Pred) {
158 switch (Pred) {
159 case CmpInst::ICMP_NE:
160 case CmpInst::ICMP_ULE:
161 case CmpInst::ICMP_SLE:
162 case CmpInst::ICMP_UGE:
163 case CmpInst::ICMP_SGE:
164 // TODO: There are 16 FCMP predicates. Should others be (not) canonical?
165 case CmpInst::FCMP_ONE:
166 case CmpInst::FCMP_OLE:
167 case CmpInst::FCMP_OGE:
168 return false;
169 default:
170 return true;
171 }
172 }
173
174 /// Add one to a Constant
175 static Constant *AddOne(Constant *C) {
176 return ConstantExpr::getAdd(C1: C, C2: ConstantInt::get(Ty: C->getType(), V: 1));
177 }
178
179 /// Subtract one from a Constant
180 static Constant *SubOne(Constant *C) {
181 return ConstantExpr::getSub(C1: C, C2: ConstantInt::get(Ty: C->getType(), V: 1));
182 }
183
184 std::optional<std::pair<
185 CmpInst::Predicate,
186 Constant *>> static getFlippedStrictnessPredicateAndConstant(CmpInst::
187 Predicate
188 Pred,
189 Constant *C);
190
191 static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI) {
192 // a ? b : false and a ? true : b are the canonical form of logical and/or.
193 // This includes !a ? b : false and !a ? true : b. Absorbing the not into
194 // the select by swapping operands would break recognition of this pattern
195 // in other analyses, so don't do that.
196 return match(V: &SI, P: PatternMatch::m_LogicalAnd(L: PatternMatch::m_Value(),
197 R: PatternMatch::m_Value())) ||
198 match(V: &SI, P: PatternMatch::m_LogicalOr(L: PatternMatch::m_Value(),
199 R: PatternMatch::m_Value()));
200 }
201
202 /// Return nonnull value if V is free to invert under the condition of
203 /// WillInvertAllUses.
204 /// If Builder is nonnull, it will return a simplified ~V.
205 /// If Builder is null, it will return an arbitrary nonnull value (not
206 /// dereferenceable).
207 /// If the inversion will consume instructions, `DoesConsume` will be set to
208 /// true. Otherwise it will be false.
209 Value *getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
210 BuilderTy *Builder, bool &DoesConsume,
211 unsigned Depth);
212
213 Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
214 BuilderTy *Builder, bool &DoesConsume) {
215 DoesConsume = false;
216 return getFreelyInvertedImpl(V, WillInvertAllUses, Builder, DoesConsume,
217 /*Depth*/ Depth: 0);
218 }
219
220 Value *getFreelyInverted(Value *V, bool WillInvertAllUses,
221 BuilderTy *Builder) {
222 bool Unused;
223 return getFreelyInverted(V, WillInvertAllUses, Builder, DoesConsume&: Unused);
224 }
225
226 /// Return true if the specified value is free to invert (apply ~ to).
227 /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
228 /// is true, work under the assumption that the caller intends to remove all
229 /// uses of V and only keep uses of ~V.
230 ///
231 /// See also: canFreelyInvertAllUsersOf()
232 bool isFreeToInvert(Value *V, bool WillInvertAllUses,
233 bool &DoesConsume) {
234 return getFreelyInverted(V, WillInvertAllUses, /*Builder*/ Builder: nullptr,
235 DoesConsume) != nullptr;
236 }
237
238 bool isFreeToInvert(Value *V, bool WillInvertAllUses) {
239 bool Unused;
240 return isFreeToInvert(V, WillInvertAllUses, DoesConsume&: Unused);
241 }
242
243 /// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
244 /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
245 /// NOTE: for Instructions only!
246 ///
247 /// See also: isFreeToInvert()
248 bool canFreelyInvertAllUsersOf(Instruction *V, Value *IgnoredUser) {
249 // Look at every user of V.
250 for (Use &U : V->uses()) {
251 if (U.getUser() == IgnoredUser)
252 continue; // Don't consider this user.
253
254 auto *I = cast<Instruction>(Val: U.getUser());
255 switch (I->getOpcode()) {
256 case Instruction::Select:
257 if (U.getOperandNo() != 0) // Only if the value is used as select cond.
258 return false;
259 if (shouldAvoidAbsorbingNotIntoSelect(SI: *cast<SelectInst>(Val: I)))
260 return false;
261 break;
262 case Instruction::Br:
263 assert(U.getOperandNo() == 0 && "Must be branching on that value.");
264 break; // Free to invert by swapping true/false values/destinations.
265 case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring
266 // it.
267 if (!match(V: I, P: m_Not(V: PatternMatch::m_Value())))
268 return false; // Not a 'not'.
269 break;
270 default:
271 return false; // Don't know, likely not freely invertible.
272 }
273 // So far all users were free to invert...
274 }
275 return true; // Can freely invert all users!
276 }
277
278 /// Some binary operators require special handling to avoid poison and
279 /// undefined behavior. If a constant vector has undef elements, replace those
280 /// undefs with identity constants if possible because those are always safe
281 /// to execute. If no identity constant exists, replace undef with some other
282 /// safe constant.
283 static Constant *
284 getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In,
285 bool IsRHSConstant) {
286 auto *InVTy = cast<FixedVectorType>(Val: In->getType());
287
288 Type *EltTy = InVTy->getElementType();
289 auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, Ty: EltTy, AllowRHSConstant: IsRHSConstant);
290 if (!SafeC) {
291 // TODO: Should this be available as a constant utility function? It is
292 // similar to getBinOpAbsorber().
293 if (IsRHSConstant) {
294 switch (Opcode) {
295 case Instruction::SRem: // X % 1 = 0
296 case Instruction::URem: // X %u 1 = 0
297 SafeC = ConstantInt::get(Ty: EltTy, V: 1);
298 break;
299 case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe)
300 SafeC = ConstantFP::get(Ty: EltTy, V: 1.0);
301 break;
302 default:
303 llvm_unreachable(
304 "Only rem opcodes have no identity constant for RHS");
305 }
306 } else {
307 switch (Opcode) {
308 case Instruction::Shl: // 0 << X = 0
309 case Instruction::LShr: // 0 >>u X = 0
310 case Instruction::AShr: // 0 >> X = 0
311 case Instruction::SDiv: // 0 / X = 0
312 case Instruction::UDiv: // 0 /u X = 0
313 case Instruction::SRem: // 0 % X = 0
314 case Instruction::URem: // 0 %u X = 0
315 case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe)
316 case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe)
317 case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe)
318 case Instruction::FRem: // 0.0 % X = 0
319 SafeC = Constant::getNullValue(Ty: EltTy);
320 break;
321 default:
322 llvm_unreachable("Expected to find identity constant for opcode");
323 }
324 }
325 }
326 assert(SafeC && "Must have safe constant for binop");
327 unsigned NumElts = InVTy->getNumElements();
328 SmallVector<Constant *, 16> Out(NumElts);
329 for (unsigned i = 0; i != NumElts; ++i) {
330 Constant *C = In->getAggregateElement(Elt: i);
331 Out[i] = isa<UndefValue>(Val: C) ? SafeC : C;
332 }
333 return ConstantVector::get(V: Out);
334 }
335
336 void addToWorklist(Instruction *I) { Worklist.push(I); }
337
338 AssumptionCache &getAssumptionCache() const { return AC; }
339 TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; }
340 DominatorTree &getDominatorTree() const { return DT; }
341 const DataLayout &getDataLayout() const { return DL; }
342 const SimplifyQuery &getSimplifyQuery() const { return SQ; }
343 OptimizationRemarkEmitter &getOptimizationRemarkEmitter() const {
344 return ORE;
345 }
346 BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; }
347 ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; }
348 LoopInfo *getLoopInfo() const { return LI; }
349
350 // Call target specific combiners
351 std::optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
352 std::optional<Value *>
353 targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask,
354 KnownBits &Known,
355 bool &KnownBitsComputed);
356 std::optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
357 IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
358 APInt &UndefElts2, APInt &UndefElts3,
359 std::function<void(Instruction *, unsigned, APInt, APInt &)>
360 SimplifyAndSetOp);
361
362 /// Inserts an instruction \p New before instruction \p Old
363 ///
364 /// Also adds the new instruction to the worklist and returns \p New so that
365 /// it is suitable for use as the return from the visitation patterns.
366 Instruction *InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old) {
367 assert(New && !New->getParent() &&
368 "New instruction already inserted into a basic block!");
369 New->insertBefore(InsertPos: Old); // Insert inst
370 Worklist.add(I: New);
371 return New;
372 }
373
374 /// Same as InsertNewInstBefore, but also sets the debug loc.
375 Instruction *InsertNewInstWith(Instruction *New, BasicBlock::iterator Old) {
376 New->setDebugLoc(Old->getDebugLoc());
377 return InsertNewInstBefore(New, Old);
378 }
379
380 /// A combiner-aware RAUW-like routine.
381 ///
382 /// This method is to be used when an instruction is found to be dead,
383 /// replaceable with another preexisting expression. Here we add all uses of
384 /// I to the worklist, replace all uses of I with the new value, then return
385 /// I, so that the inst combiner will know that I was modified.
386 Instruction *replaceInstUsesWith(Instruction &I, Value *V) {
387 // If there are no uses to replace, then we return nullptr to indicate that
388 // no changes were made to the program.
389 if (I.use_empty()) return nullptr;
390
391 Worklist.pushUsersToWorkList(I); // Add all modified instrs to worklist.
392
393 // If we are replacing the instruction with itself, this must be in a
394 // segment of unreachable code, so just clobber the instruction.
395 if (&I == V)
396 V = PoisonValue::get(T: I.getType());
397
398 LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n"
399 << " with " << *V << '\n');
400
401 // If V is a new unnamed instruction, take the name from the old one.
402 if (V->use_empty() && isa<Instruction>(Val: V) && !V->hasName() && I.hasName())
403 V->takeName(V: &I);
404
405 I.replaceAllUsesWith(V);
406 return &I;
407 }
408
409 /// Replace operand of instruction and add old operand to the worklist.
410 Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) {
411 Value *OldOp = I.getOperand(i: OpNum);
412 I.setOperand(i: OpNum, Val: V);
413 Worklist.handleUseCountDecrement(V: OldOp);
414 return &I;
415 }
416
417 /// Replace use and add the previously used value to the worklist.
418 void replaceUse(Use &U, Value *NewValue) {
419 Value *OldOp = U;
420 U = NewValue;
421 Worklist.handleUseCountDecrement(V: OldOp);
422 }
423
424 /// Combiner aware instruction erasure.
425 ///
426 /// When dealing with an instruction that has side effects or produces a void
427 /// value, we can't rely on DCE to delete the instruction. Instead, visit
428 /// methods should return the value returned by this function.
429 virtual Instruction *eraseInstFromFunction(Instruction &I) = 0;
430
431 void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
432 const Instruction *CxtI) const {
433 llvm::computeKnownBits(V, Known, Depth, Q: SQ.getWithInstruction(I: CxtI));
434 }
435
436 KnownBits computeKnownBits(const Value *V, unsigned Depth,
437 const Instruction *CxtI) const {
438 return llvm::computeKnownBits(V, Depth, Q: SQ.getWithInstruction(I: CxtI));
439 }
440
441 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false,
442 unsigned Depth = 0,
443 const Instruction *CxtI = nullptr) {
444 return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, AC: &AC, CxtI, DT: &DT);
445 }
446
447 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0,
448 const Instruction *CxtI = nullptr) const {
449 return llvm::MaskedValueIsZero(V, Mask, DL: SQ.getWithInstruction(I: CxtI), Depth);
450 }
451
452 unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0,
453 const Instruction *CxtI = nullptr) const {
454 return llvm::ComputeNumSignBits(Op, DL, Depth, AC: &AC, CxtI, DT: &DT);
455 }
456
457 unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth = 0,
458 const Instruction *CxtI = nullptr) const {
459 return llvm::ComputeMaxSignificantBits(Op, DL, Depth, AC: &AC, CxtI, DT: &DT);
460 }
461
462 OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
463 const Value *RHS,
464 const Instruction *CxtI,
465 bool IsNSW = false) const {
466 return llvm::computeOverflowForUnsignedMul(
467 LHS, RHS, SQ: SQ.getWithInstruction(I: CxtI), IsNSW);
468 }
469
470 OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
471 const Instruction *CxtI) const {
472 return llvm::computeOverflowForSignedMul(LHS, RHS,
473 SQ: SQ.getWithInstruction(I: CxtI));
474 }
475
476 OverflowResult
477 computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
478 const WithCache<const Value *> &RHS,
479 const Instruction *CxtI) const {
480 return llvm::computeOverflowForUnsignedAdd(LHS, RHS,
481 SQ: SQ.getWithInstruction(I: CxtI));
482 }
483
484 OverflowResult
485 computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
486 const WithCache<const Value *> &RHS,
487 const Instruction *CxtI) const {
488 return llvm::computeOverflowForSignedAdd(LHS, RHS,
489 SQ: SQ.getWithInstruction(I: CxtI));
490 }
491
492 OverflowResult computeOverflowForUnsignedSub(const Value *LHS,
493 const Value *RHS,
494 const Instruction *CxtI) const {
495 return llvm::computeOverflowForUnsignedSub(LHS, RHS,
496 SQ: SQ.getWithInstruction(I: CxtI));
497 }
498
499 OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS,
500 const Instruction *CxtI) const {
501 return llvm::computeOverflowForSignedSub(LHS, RHS,
502 SQ: SQ.getWithInstruction(I: CxtI));
503 }
504
505 virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo,
506 const APInt &DemandedMask, KnownBits &Known,
507 unsigned Depth, const SimplifyQuery &Q) = 0;
508
509 bool SimplifyDemandedBits(Instruction *I, unsigned OpNo,
510 const APInt &DemandedMask, KnownBits &Known) {
511 return SimplifyDemandedBits(I, OpNo, DemandedMask, Known,
512 /*Depth=*/Depth: 0, Q: SQ.getWithInstruction(I));
513 }
514
515 virtual Value *
516 SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
517 unsigned Depth = 0,
518 bool AllowMultipleUsers = false) = 0;
519
520 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
521};
522
523} // namespace llvm
524
525#undef DEBUG_TYPE
526
527#endif
528