1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Analysis/ValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/FloatingPointMode.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/ADT/iterator_range.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/Analysis/AssumeBundleQueries.h"
28#include "llvm/Analysis/AssumptionCache.h"
29#include "llvm/Analysis/ConstantFolding.h"
30#include "llvm/Analysis/DomConditionCache.h"
31#include "llvm/Analysis/FloatingPointPredicateUtils.h"
32#include "llvm/Analysis/GuardUtils.h"
33#include "llvm/Analysis/InstructionSimplify.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/TargetLibraryInfo.h"
37#include "llvm/Analysis/VectorUtils.h"
38#include "llvm/Analysis/WithCache.h"
39#include "llvm/IR/Argument.h"
40#include "llvm/IR/Attributes.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/Constant.h"
43#include "llvm/IR/ConstantRange.h"
44#include "llvm/IR/Constants.h"
45#include "llvm/IR/DerivedTypes.h"
46#include "llvm/IR/DiagnosticInfo.h"
47#include "llvm/IR/Dominators.h"
48#include "llvm/IR/EHPersonalities.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GetElementPtrTypeIterator.h"
51#include "llvm/IR/GlobalAlias.h"
52#include "llvm/IR/GlobalValue.h"
53#include "llvm/IR/GlobalVariable.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/IntrinsicInst.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/IntrinsicsAArch64.h"
60#include "llvm/IR/IntrinsicsAMDGPU.h"
61#include "llvm/IR/IntrinsicsRISCV.h"
62#include "llvm/IR/IntrinsicsX86.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/IR/Metadata.h"
65#include "llvm/IR/Module.h"
66#include "llvm/IR/Operator.h"
67#include "llvm/IR/PatternMatch.h"
68#include "llvm/IR/Type.h"
69#include "llvm/IR/User.h"
70#include "llvm/IR/Value.h"
71#include "llvm/Support/Casting.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/KnownFPClass.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/TargetParser/RISCVTargetParser.h"
79#include <algorithm>
80#include <cassert>
81#include <cstdint>
82#include <optional>
83#include <utility>
84
85using namespace llvm;
86using namespace llvm::PatternMatch;
87
88// Controls the number of uses of the value searched for possible
89// dominating comparisons.
90static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
91 cl::Hidden, cl::init(Val: 20));
92
93
94/// Returns the bitwidth of the given scalar or pointer type. For vector types,
95/// returns the element type's bitwidth.
96static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
97 if (unsigned BitWidth = Ty->getScalarSizeInBits())
98 return BitWidth;
99
100 return DL.getPointerTypeSizeInBits(Ty);
101}
102
103// Given the provided Value and, potentially, a context instruction, return
104// the preferred context instruction (if any).
105static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
106 // If we've been provided with a context instruction, then use that (provided
107 // it has been inserted).
108 if (CxtI && CxtI->getParent())
109 return CxtI;
110
111 // If the value is really an already-inserted instruction, then use that.
112 CxtI = dyn_cast<Instruction>(Val: V);
113 if (CxtI && CxtI->getParent())
114 return CxtI;
115
116 return nullptr;
117}
118
119static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
120 const APInt &DemandedElts,
121 APInt &DemandedLHS, APInt &DemandedRHS) {
122 if (isa<ScalableVectorType>(Val: Shuf->getType())) {
123 assert(DemandedElts == APInt(1,1));
124 DemandedLHS = DemandedRHS = DemandedElts;
125 return true;
126 }
127
128 int NumElts =
129 cast<FixedVectorType>(Val: Shuf->getOperand(i_nocapture: 0)->getType())->getNumElements();
130 return llvm::getShuffleDemandedElts(SrcWidth: NumElts, Mask: Shuf->getShuffleMask(),
131 DemandedElts, DemandedLHS, DemandedRHS);
132}
133
134static void computeKnownBits(const Value *V, const APInt &DemandedElts,
135 KnownBits &Known, const SimplifyQuery &Q,
136 unsigned Depth);
137
138void llvm::computeKnownBits(const Value *V, KnownBits &Known,
139 const SimplifyQuery &Q, unsigned Depth) {
140 // Since the number of lanes in a scalable vector is unknown at compile time,
141 // we track one bit which is implicitly broadcast to all lanes. This means
142 // that all lanes in a scalable vector are considered demanded.
143 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
144 APInt DemandedElts =
145 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
146 ::computeKnownBits(V, DemandedElts, Known, Q, Depth);
147}
148
149void llvm::computeKnownBits(const Value *V, KnownBits &Known,
150 const DataLayout &DL, AssumptionCache *AC,
151 const Instruction *CxtI, const DominatorTree *DT,
152 bool UseInstrInfo, unsigned Depth) {
153 computeKnownBits(V, Known,
154 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo),
155 Depth);
156}
157
158KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
159 AssumptionCache *AC, const Instruction *CxtI,
160 const DominatorTree *DT, bool UseInstrInfo,
161 unsigned Depth) {
162 return computeKnownBits(
163 V, Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
164}
165
166KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
167 const DataLayout &DL, AssumptionCache *AC,
168 const Instruction *CxtI,
169 const DominatorTree *DT, bool UseInstrInfo,
170 unsigned Depth) {
171 return computeKnownBits(
172 V, DemandedElts,
173 Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
174}
175
176static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
177 const SimplifyQuery &SQ) {
178 // Look for an inverted mask: (X & ~M) op (Y & M).
179 {
180 Value *M;
181 if (match(V: LHS, P: m_c_And(L: m_Not(V: m_Value(V&: M)), R: m_Value())) &&
182 match(V: RHS, P: m_c_And(L: m_Specific(V: M), R: m_Value())) &&
183 isGuaranteedNotToBeUndef(V: M, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
184 return true;
185 }
186
187 // X op (Y & ~X)
188 if (match(V: RHS, P: m_c_And(L: m_Not(V: m_Specific(V: LHS)), R: m_Value())) &&
189 isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
190 return true;
191
192 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
193 // for constant Y.
194 Value *Y;
195 if (match(V: RHS,
196 P: m_c_Xor(L: m_c_And(L: m_Specific(V: LHS), R: m_Value(V&: Y)), R: m_Deferred(V: Y))) &&
197 isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT) &&
198 isGuaranteedNotToBeUndef(V: Y, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
199 return true;
200
201 // Peek through extends to find a 'not' of the other side:
202 // (ext Y) op ext(~Y)
203 if (match(V: LHS, P: m_ZExtOrSExt(Op: m_Value(V&: Y))) &&
204 match(V: RHS, P: m_ZExtOrSExt(Op: m_Not(V: m_Specific(V: Y)))) &&
205 isGuaranteedNotToBeUndef(V: Y, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
206 return true;
207
208 // Look for: (A & B) op ~(A | B)
209 {
210 Value *A, *B;
211 if (match(V: LHS, P: m_And(L: m_Value(V&: A), R: m_Value(V&: B))) &&
212 match(V: RHS, P: m_Not(V: m_c_Or(L: m_Specific(V: A), R: m_Specific(V: B)))) &&
213 isGuaranteedNotToBeUndef(V: A, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT) &&
214 isGuaranteedNotToBeUndef(V: B, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
215 return true;
216 }
217
218 // Look for: (X << V) op (Y >> (BitWidth - V))
219 // or (X >> V) op (Y << (BitWidth - V))
220 {
221 const Value *V;
222 const APInt *R;
223 if (((match(V: RHS, P: m_Shl(L: m_Value(), R: m_Sub(L: m_APInt(Res&: R), R: m_Value(V)))) &&
224 match(V: LHS, P: m_LShr(L: m_Value(), R: m_Specific(V)))) ||
225 (match(V: RHS, P: m_LShr(L: m_Value(), R: m_Sub(L: m_APInt(Res&: R), R: m_Value(V)))) &&
226 match(V: LHS, P: m_Shl(L: m_Value(), R: m_Specific(V))))) &&
227 R->uge(RHS: LHS->getType()->getScalarSizeInBits()))
228 return true;
229 }
230
231 return false;
232}
233
234bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
235 const WithCache<const Value *> &RHSCache,
236 const SimplifyQuery &SQ) {
237 const Value *LHS = LHSCache.getValue();
238 const Value *RHS = RHSCache.getValue();
239
240 assert(LHS->getType() == RHS->getType() &&
241 "LHS and RHS should have the same type");
242 assert(LHS->getType()->isIntOrIntVectorTy() &&
243 "LHS and RHS should be integers");
244
245 if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
246 haveNoCommonBitsSetSpecialCases(LHS: RHS, RHS: LHS, SQ))
247 return true;
248
249 return KnownBits::haveNoCommonBitsSet(LHS: LHSCache.getKnownBits(Q: SQ),
250 RHS: RHSCache.getKnownBits(Q: SQ));
251}
252
253bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
254 return !I->user_empty() && all_of(Range: I->users(), P: [](const User *U) {
255 return match(V: U, P: m_ICmp(L: m_Value(), R: m_Zero()));
256 });
257}
258
259bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
260 return !I->user_empty() && all_of(Range: I->users(), P: [](const User *U) {
261 CmpPredicate P;
262 return match(V: U, P: m_ICmp(Pred&: P, L: m_Value(), R: m_Zero())) && ICmpInst::isEquality(P);
263 });
264}
265
266bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
267 bool OrZero, AssumptionCache *AC,
268 const Instruction *CxtI,
269 const DominatorTree *DT, bool UseInstrInfo,
270 unsigned Depth) {
271 return ::isKnownToBeAPowerOfTwo(
272 V, OrZero, Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo),
273 Depth);
274}
275
276static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
277 const SimplifyQuery &Q, unsigned Depth);
278
279bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
280 unsigned Depth) {
281 return computeKnownBits(V, Q: SQ, Depth).isNonNegative();
282}
283
284bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
285 unsigned Depth) {
286 if (auto *CI = dyn_cast<ConstantInt>(Val: V))
287 return CI->getValue().isStrictlyPositive();
288
289 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
290 // this updated.
291 KnownBits Known = computeKnownBits(V, Q: SQ, Depth);
292 return Known.isNonNegative() &&
293 (Known.isNonZero() || isKnownNonZero(V, Q: SQ, Depth));
294}
295
296bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
297 unsigned Depth) {
298 return computeKnownBits(V, Q: SQ, Depth).isNegative();
299}
300
301static bool isKnownNonEqual(const Value *V1, const Value *V2,
302 const APInt &DemandedElts, const SimplifyQuery &Q,
303 unsigned Depth);
304
305bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
306 const SimplifyQuery &Q, unsigned Depth) {
307 // We don't support looking through casts.
308 if (V1 == V2 || V1->getType() != V2->getType())
309 return false;
310 auto *FVTy = dyn_cast<FixedVectorType>(Val: V1->getType());
311 APInt DemandedElts =
312 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
313 return ::isKnownNonEqual(V1, V2, DemandedElts, Q, Depth);
314}
315
316bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
317 const SimplifyQuery &SQ, unsigned Depth) {
318 KnownBits Known(Mask.getBitWidth());
319 computeKnownBits(V, Known, Q: SQ, Depth);
320 return Mask.isSubsetOf(RHS: Known.Zero);
321}
322
323static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
324 const SimplifyQuery &Q, unsigned Depth);
325
326static unsigned ComputeNumSignBits(const Value *V, const SimplifyQuery &Q,
327 unsigned Depth = 0) {
328 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
329 APInt DemandedElts =
330 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
331 return ComputeNumSignBits(V, DemandedElts, Q, Depth);
332}
333
334unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
335 AssumptionCache *AC, const Instruction *CxtI,
336 const DominatorTree *DT, bool UseInstrInfo,
337 unsigned Depth) {
338 return ::ComputeNumSignBits(
339 V, Q: SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
340}
341
342unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
343 AssumptionCache *AC,
344 const Instruction *CxtI,
345 const DominatorTree *DT,
346 unsigned Depth) {
347 unsigned SignBits = ComputeNumSignBits(V, DL, AC, CxtI, DT, UseInstrInfo: Depth);
348 return V->getType()->getScalarSizeInBits() - SignBits + 1;
349}
350
351static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
352 bool NSW, bool NUW,
353 const APInt &DemandedElts,
354 KnownBits &KnownOut, KnownBits &Known2,
355 const SimplifyQuery &Q, unsigned Depth) {
356 computeKnownBits(V: Op1, DemandedElts, Known&: KnownOut, Q, Depth: Depth + 1);
357
358 // If one operand is unknown and we have no nowrap information,
359 // the result will be unknown independently of the second operand.
360 if (KnownOut.isUnknown() && !NSW && !NUW)
361 return;
362
363 computeKnownBits(V: Op0, DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
364 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, LHS: Known2, RHS: KnownOut);
365
366 if (!Add && NSW && !KnownOut.isNonNegative() &&
367 isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: Op1, RHS: Op0, ContextI: Q.CxtI, DL: Q.DL)
368 .value_or(u: false))
369 KnownOut.makeNonNegative();
370}
371
372static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
373 bool NUW, const APInt &DemandedElts,
374 KnownBits &Known, KnownBits &Known2,
375 const SimplifyQuery &Q, unsigned Depth) {
376 computeKnownBits(V: Op1, DemandedElts, Known, Q, Depth: Depth + 1);
377 computeKnownBits(V: Op0, DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
378
379 bool isKnownNegative = false;
380 bool isKnownNonNegative = false;
381 // If the multiplication is known not to overflow, compute the sign bit.
382 if (NSW) {
383 if (Op0 == Op1) {
384 // The product of a number with itself is non-negative.
385 isKnownNonNegative = true;
386 } else {
387 bool isKnownNonNegativeOp1 = Known.isNonNegative();
388 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
389 bool isKnownNegativeOp1 = Known.isNegative();
390 bool isKnownNegativeOp0 = Known2.isNegative();
391 // The product of two numbers with the same sign is non-negative.
392 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
393 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
394 if (!isKnownNonNegative && NUW) {
395 // mul nuw nsw with a factor > 1 is non-negative.
396 KnownBits One = KnownBits::makeConstant(C: APInt(Known.getBitWidth(), 1));
397 isKnownNonNegative = KnownBits::sgt(LHS: Known, RHS: One).value_or(u: false) ||
398 KnownBits::sgt(LHS: Known2, RHS: One).value_or(u: false);
399 }
400
401 // The product of a negative number and a non-negative number is either
402 // negative or zero.
403 if (!isKnownNonNegative)
404 isKnownNegative =
405 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
406 Known2.isNonZero()) ||
407 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
408 }
409 }
410
411 bool SelfMultiply = Op0 == Op1;
412 if (SelfMultiply)
413 SelfMultiply &=
414 isGuaranteedNotToBeUndef(V: Op0, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1);
415 Known = KnownBits::mul(LHS: Known, RHS: Known2, NoUndefSelfMultiply: SelfMultiply);
416
417 // Only make use of no-wrap flags if we failed to compute the sign bit
418 // directly. This matters if the multiplication always overflows, in
419 // which case we prefer to follow the result of the direct computation,
420 // though as the program is invoking undefined behaviour we can choose
421 // whatever we like here.
422 if (isKnownNonNegative && !Known.isNegative())
423 Known.makeNonNegative();
424 else if (isKnownNegative && !Known.isNonNegative())
425 Known.makeNegative();
426}
427
428void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
429 KnownBits &Known) {
430 unsigned BitWidth = Known.getBitWidth();
431 unsigned NumRanges = Ranges.getNumOperands() / 2;
432 assert(NumRanges >= 1);
433
434 Known.Zero.setAllBits();
435 Known.One.setAllBits();
436
437 for (unsigned i = 0; i < NumRanges; ++i) {
438 ConstantInt *Lower =
439 mdconst::extract<ConstantInt>(MD: Ranges.getOperand(I: 2 * i + 0));
440 ConstantInt *Upper =
441 mdconst::extract<ConstantInt>(MD: Ranges.getOperand(I: 2 * i + 1));
442 ConstantRange Range(Lower->getValue(), Upper->getValue());
443 // BitWidth must equal the Ranges BitWidth for the correct number of high
444 // bits to be set.
445 assert(BitWidth == Range.getBitWidth() &&
446 "Known bit width must match range bit width!");
447
448 // The first CommonPrefixBits of all values in Range are equal.
449 unsigned CommonPrefixBits =
450 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
451 APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: CommonPrefixBits);
452 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(width: BitWidth);
453 Known.One &= UnsignedMax & Mask;
454 Known.Zero &= ~UnsignedMax & Mask;
455 }
456}
457
458static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
459 SmallVector<const Instruction *, 16> WorkSet(1, I);
460 SmallPtrSet<const Instruction *, 32> Visited;
461 SmallPtrSet<const Instruction *, 16> EphValues;
462
463 // The instruction defining an assumption's condition itself is always
464 // considered ephemeral to that assumption (even if it has other
465 // non-ephemeral users). See r246696's test case for an example.
466 if (is_contained(Range: I->operands(), Element: E))
467 return true;
468
469 while (!WorkSet.empty()) {
470 const Instruction *V = WorkSet.pop_back_val();
471 if (!Visited.insert(Ptr: V).second)
472 continue;
473
474 // If all uses of this value are ephemeral, then so is this value.
475 if (all_of(Range: V->users(), P: [&](const User *U) {
476 return EphValues.count(Ptr: cast<Instruction>(Val: U));
477 })) {
478 if (V == E)
479 return true;
480
481 if (V == I || (!V->mayHaveSideEffects() && !V->isTerminator())) {
482 EphValues.insert(Ptr: V);
483
484 if (const User *U = dyn_cast<User>(Val: V)) {
485 for (const Use &U : U->operands()) {
486 if (const auto *I = dyn_cast<Instruction>(Val: U.get()))
487 WorkSet.push_back(Elt: I);
488 }
489 }
490 }
491 }
492 }
493
494 return false;
495}
496
497// Is this an intrinsic that cannot be speculated but also cannot trap?
498bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
499 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Val: I))
500 return CI->isAssumeLikeIntrinsic();
501
502 return false;
503}
504
505bool llvm::isValidAssumeForContext(const Instruction *Inv,
506 const Instruction *CxtI,
507 const DominatorTree *DT,
508 bool AllowEphemerals) {
509 // There are two restrictions on the use of an assume:
510 // 1. The assume must dominate the context (or the control flow must
511 // reach the assume whenever it reaches the context).
512 // 2. The context must not be in the assume's set of ephemeral values
513 // (otherwise we will use the assume to prove that the condition
514 // feeding the assume is trivially true, thus causing the removal of
515 // the assume).
516
517 if (Inv->getParent() == CxtI->getParent()) {
518 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
519 // in the BB.
520 if (Inv->comesBefore(Other: CxtI))
521 return true;
522
523 // Don't let an assume affect itself - this would cause the problems
524 // `isEphemeralValueOf` is trying to prevent, and it would also make
525 // the loop below go out of bounds.
526 if (!AllowEphemerals && Inv == CxtI)
527 return false;
528
529 // The context comes first, but they're both in the same block.
530 // Make sure there is nothing in between that might interrupt
531 // the control flow, not even CxtI itself.
532 // We limit the scan distance between the assume and its context instruction
533 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
534 // it can be adjusted if needed (could be turned into a cl::opt).
535 auto Range = make_range(x: CxtI->getIterator(), y: Inv->getIterator());
536 if (!isGuaranteedToTransferExecutionToSuccessor(Range, ScanLimit: 15))
537 return false;
538
539 return AllowEphemerals || !isEphemeralValueOf(I: Inv, E: CxtI);
540 }
541
542 // Inv and CxtI are in different blocks.
543 if (DT) {
544 if (DT->dominates(Def: Inv, User: CxtI))
545 return true;
546 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor() ||
547 Inv->getParent()->isEntryBlock()) {
548 // We don't have a DT, but this trivially dominates.
549 return true;
550 }
551
552 return false;
553}
554
555// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
556// we still have enough information about `RHS` to conclude non-zero. For
557// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
558// so the extra compile time may not be worth it, but possibly a second API
559// should be created for use outside of loops.
560static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
561 // v u> y implies v != 0.
562 if (Pred == ICmpInst::ICMP_UGT)
563 return true;
564
565 // Special-case v != 0 to also handle v != null.
566 if (Pred == ICmpInst::ICMP_NE)
567 return match(V: RHS, P: m_Zero());
568
569 // All other predicates - rely on generic ConstantRange handling.
570 const APInt *C;
571 auto Zero = APInt::getZero(numBits: RHS->getType()->getScalarSizeInBits());
572 if (match(V: RHS, P: m_APInt(Res&: C))) {
573 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, Other: *C);
574 return !TrueValues.contains(Val: Zero);
575 }
576
577 auto *VC = dyn_cast<ConstantDataVector>(Val: RHS);
578 if (VC == nullptr)
579 return false;
580
581 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
582 ++ElemIdx) {
583 ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
584 Pred, Other: VC->getElementAsAPInt(i: ElemIdx));
585 if (TrueValues.contains(Val: Zero))
586 return false;
587 }
588 return true;
589}
590
591static void breakSelfRecursivePHI(const Use *U, const PHINode *PHI,
592 Value *&ValOut, Instruction *&CtxIOut,
593 const PHINode **PhiOut = nullptr) {
594 ValOut = U->get();
595 if (ValOut == PHI)
596 return;
597 CtxIOut = PHI->getIncomingBlock(U: *U)->getTerminator();
598 if (PhiOut)
599 *PhiOut = PHI;
600 Value *V;
601 // If the Use is a select of this phi, compute analysis on other arm to break
602 // recursion.
603 // TODO: Min/Max
604 if (match(V: ValOut, P: m_Select(C: m_Value(), L: m_Specific(V: PHI), R: m_Value(V))) ||
605 match(V: ValOut, P: m_Select(C: m_Value(), L: m_Value(V), R: m_Specific(V: PHI))))
606 ValOut = V;
607
608 // Same for select, if this phi is 2-operand phi, compute analysis on other
609 // incoming value to break recursion.
610 // TODO: We could handle any number of incoming edges as long as we only have
611 // two unique values.
612 if (auto *IncPhi = dyn_cast<PHINode>(Val: ValOut);
613 IncPhi && IncPhi->getNumIncomingValues() == 2) {
614 for (int Idx = 0; Idx < 2; ++Idx) {
615 if (IncPhi->getIncomingValue(i: Idx) == PHI) {
616 ValOut = IncPhi->getIncomingValue(i: 1 - Idx);
617 if (PhiOut)
618 *PhiOut = IncPhi;
619 CtxIOut = IncPhi->getIncomingBlock(i: 1 - Idx)->getTerminator();
620 break;
621 }
622 }
623 }
624}
625
626static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
627 // Use of assumptions is context-sensitive. If we don't have a context, we
628 // cannot use them!
629 if (!Q.AC || !Q.CxtI)
630 return false;
631
632 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
633 if (!Elem.Assume)
634 continue;
635
636 AssumeInst *I = cast<AssumeInst>(Val&: Elem.Assume);
637 assert(I->getFunction() == Q.CxtI->getFunction() &&
638 "Got assumption for the wrong function!");
639
640 if (Elem.Index != AssumptionCache::ExprResultIdx) {
641 if (!V->getType()->isPointerTy())
642 continue;
643 if (RetainedKnowledge RK = getKnowledgeFromBundle(
644 Assume&: *I, BOI: I->bundle_op_info_begin()[Elem.Index])) {
645 if (RK.WasOn == V &&
646 (RK.AttrKind == Attribute::NonNull ||
647 (RK.AttrKind == Attribute::Dereferenceable &&
648 !NullPointerIsDefined(F: Q.CxtI->getFunction(),
649 AS: V->getType()->getPointerAddressSpace()))) &&
650 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
651 return true;
652 }
653 continue;
654 }
655
656 // Warning: This loop can end up being somewhat performance sensitive.
657 // We're running this loop for once for each value queried resulting in a
658 // runtime of ~O(#assumes * #values).
659
660 Value *RHS;
661 CmpPredicate Pred;
662 auto m_V = m_CombineOr(L: m_Specific(V), R: m_PtrToInt(Op: m_Specific(V)));
663 if (!match(V: I->getArgOperand(i: 0), P: m_c_ICmp(Pred, L: m_V, R: m_Value(V&: RHS))))
664 continue;
665
666 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
667 return true;
668 }
669
670 return false;
671}
672
673static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
674 Value *LHS, Value *RHS, KnownBits &Known,
675 const SimplifyQuery &Q) {
676 if (RHS->getType()->isPointerTy()) {
677 // Handle comparison of pointer to null explicitly, as it will not be
678 // covered by the m_APInt() logic below.
679 if (LHS == V && match(V: RHS, P: m_Zero())) {
680 switch (Pred) {
681 case ICmpInst::ICMP_EQ:
682 Known.setAllZero();
683 break;
684 case ICmpInst::ICMP_SGE:
685 case ICmpInst::ICMP_SGT:
686 Known.makeNonNegative();
687 break;
688 case ICmpInst::ICMP_SLT:
689 Known.makeNegative();
690 break;
691 default:
692 break;
693 }
694 }
695 return;
696 }
697
698 unsigned BitWidth = Known.getBitWidth();
699 auto m_V =
700 m_CombineOr(L: m_Specific(V), R: m_PtrToIntSameSize(DL: Q.DL, Op: m_Specific(V)));
701
702 Value *Y;
703 const APInt *Mask, *C;
704 if (!match(V: RHS, P: m_APInt(Res&: C)))
705 return;
706
707 uint64_t ShAmt;
708 switch (Pred) {
709 case ICmpInst::ICMP_EQ:
710 // assume(V = C)
711 if (match(V: LHS, P: m_V)) {
712 Known = Known.unionWith(RHS: KnownBits::makeConstant(C: *C));
713 // assume(V & Mask = C)
714 } else if (match(V: LHS, P: m_c_And(L: m_V, R: m_Value(V&: Y)))) {
715 // For one bits in Mask, we can propagate bits from C to V.
716 Known.One |= *C;
717 if (match(V: Y, P: m_APInt(Res&: Mask)))
718 Known.Zero |= ~*C & *Mask;
719 // assume(V | Mask = C)
720 } else if (match(V: LHS, P: m_c_Or(L: m_V, R: m_Value(V&: Y)))) {
721 // For zero bits in Mask, we can propagate bits from C to V.
722 Known.Zero |= ~*C;
723 if (match(V: Y, P: m_APInt(Res&: Mask)))
724 Known.One |= *C & ~*Mask;
725 // assume(V << ShAmt = C)
726 } else if (match(V: LHS, P: m_Shl(L: m_V, R: m_ConstantInt(V&: ShAmt))) &&
727 ShAmt < BitWidth) {
728 // For those bits in C that are known, we can propagate them to known
729 // bits in V shifted to the right by ShAmt.
730 KnownBits RHSKnown = KnownBits::makeConstant(C: *C);
731 RHSKnown.Zero.lshrInPlace(ShiftAmt: ShAmt);
732 RHSKnown.One.lshrInPlace(ShiftAmt: ShAmt);
733 Known = Known.unionWith(RHS: RHSKnown);
734 // assume(V >> ShAmt = C)
735 } else if (match(V: LHS, P: m_Shr(L: m_V, R: m_ConstantInt(V&: ShAmt))) &&
736 ShAmt < BitWidth) {
737 KnownBits RHSKnown = KnownBits::makeConstant(C: *C);
738 // For those bits in RHS that are known, we can propagate them to known
739 // bits in V shifted to the right by C.
740 Known.Zero |= RHSKnown.Zero << ShAmt;
741 Known.One |= RHSKnown.One << ShAmt;
742 }
743 break;
744 case ICmpInst::ICMP_NE: {
745 // assume (V & B != 0) where B is a power of 2
746 const APInt *BPow2;
747 if (C->isZero() && match(V: LHS, P: m_And(L: m_V, R: m_Power2(V&: BPow2))))
748 Known.One |= *BPow2;
749 break;
750 }
751 default: {
752 const APInt *Offset = nullptr;
753 if (match(V: LHS, P: m_CombineOr(L: m_V, R: m_AddLike(L: m_V, R: m_APInt(Res&: Offset))))) {
754 ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, Other: *C);
755 if (Offset)
756 LHSRange = LHSRange.sub(Other: *Offset);
757 Known = Known.unionWith(RHS: LHSRange.toKnownBits());
758 }
759 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
760 // X & Y u> C -> X u> C && Y u> C
761 // X nuw- Y u> C -> X u> C
762 if (match(V: LHS, P: m_c_And(L: m_V, R: m_Value())) ||
763 match(V: LHS, P: m_NUWSub(L: m_V, R: m_Value())))
764 Known.One.setHighBits(
765 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
766 }
767 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
768 // X | Y u< C -> X u< C && Y u< C
769 // X nuw+ Y u< C -> X u< C && Y u< C
770 if (match(V: LHS, P: m_c_Or(L: m_V, R: m_Value())) ||
771 match(V: LHS, P: m_c_NUWAdd(L: m_V, R: m_Value()))) {
772 Known.Zero.setHighBits(
773 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
774 }
775 }
776 } break;
777 }
778}
779
780static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
781 KnownBits &Known,
782 const SimplifyQuery &SQ, bool Invert) {
783 ICmpInst::Predicate Pred =
784 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
785 Value *LHS = Cmp->getOperand(i_nocapture: 0);
786 Value *RHS = Cmp->getOperand(i_nocapture: 1);
787
788 // Handle icmp pred (trunc V), C
789 if (match(V: LHS, P: m_Trunc(Op: m_Specific(V)))) {
790 KnownBits DstKnown(LHS->getType()->getScalarSizeInBits());
791 computeKnownBitsFromCmp(V: LHS, Pred, LHS, RHS, Known&: DstKnown, Q: SQ);
792 if (cast<TruncInst>(Val: LHS)->hasNoUnsignedWrap())
793 Known = Known.unionWith(RHS: DstKnown.zext(BitWidth: Known.getBitWidth()));
794 else
795 Known = Known.unionWith(RHS: DstKnown.anyext(BitWidth: Known.getBitWidth()));
796 return;
797 }
798
799 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, Q: SQ);
800}
801
802static void computeKnownBitsFromCond(const Value *V, Value *Cond,
803 KnownBits &Known, const SimplifyQuery &SQ,
804 bool Invert, unsigned Depth) {
805 Value *A, *B;
806 if (Depth < MaxAnalysisRecursionDepth &&
807 match(V: Cond, P: m_LogicalOp(L: m_Value(V&: A), R: m_Value(V&: B)))) {
808 KnownBits Known2(Known.getBitWidth());
809 KnownBits Known3(Known.getBitWidth());
810 computeKnownBitsFromCond(V, Cond: A, Known&: Known2, SQ, Invert, Depth: Depth + 1);
811 computeKnownBitsFromCond(V, Cond: B, Known&: Known3, SQ, Invert, Depth: Depth + 1);
812 if (Invert ? match(V: Cond, P: m_LogicalOr(L: m_Value(), R: m_Value()))
813 : match(V: Cond, P: m_LogicalAnd(L: m_Value(), R: m_Value())))
814 Known2 = Known2.unionWith(RHS: Known3);
815 else
816 Known2 = Known2.intersectWith(RHS: Known3);
817 Known = Known.unionWith(RHS: Known2);
818 return;
819 }
820
821 if (auto *Cmp = dyn_cast<ICmpInst>(Val: Cond)) {
822 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
823 return;
824 }
825
826 if (match(V: Cond, P: m_Trunc(Op: m_Specific(V)))) {
827 KnownBits DstKnown(1);
828 if (Invert) {
829 DstKnown.setAllZero();
830 } else {
831 DstKnown.setAllOnes();
832 }
833 if (cast<TruncInst>(Val: Cond)->hasNoUnsignedWrap()) {
834 Known = Known.unionWith(RHS: DstKnown.zext(BitWidth: Known.getBitWidth()));
835 return;
836 }
837 Known = Known.unionWith(RHS: DstKnown.anyext(BitWidth: Known.getBitWidth()));
838 return;
839 }
840
841 if (Depth < MaxAnalysisRecursionDepth && match(V: Cond, P: m_Not(V: m_Value(V&: A))))
842 computeKnownBitsFromCond(V, Cond: A, Known, SQ, Invert: !Invert, Depth: Depth + 1);
843}
844
845void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
846 const SimplifyQuery &Q, unsigned Depth) {
847 // Handle injected condition.
848 if (Q.CC && Q.CC->AffectedValues.contains(Ptr: V))
849 computeKnownBitsFromCond(V, Cond: Q.CC->Cond, Known, SQ: Q, Invert: Q.CC->Invert, Depth);
850
851 if (!Q.CxtI)
852 return;
853
854 if (Q.DC && Q.DT) {
855 // Handle dominating conditions.
856 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
857 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
858 if (Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()))
859 computeKnownBitsFromCond(V, Cond: BI->getCondition(), Known, SQ: Q,
860 /*Invert*/ false, Depth);
861
862 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
863 if (Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()))
864 computeKnownBitsFromCond(V, Cond: BI->getCondition(), Known, SQ: Q,
865 /*Invert*/ true, Depth);
866 }
867
868 if (Known.hasConflict())
869 Known.resetAll();
870 }
871
872 if (!Q.AC)
873 return;
874
875 unsigned BitWidth = Known.getBitWidth();
876
877 // Note that the patterns below need to be kept in sync with the code
878 // in AssumptionCache::updateAffectedValues.
879
880 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
881 if (!Elem.Assume)
882 continue;
883
884 AssumeInst *I = cast<AssumeInst>(Val&: Elem.Assume);
885 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
886 "Got assumption for the wrong function!");
887
888 if (Elem.Index != AssumptionCache::ExprResultIdx) {
889 if (!V->getType()->isPointerTy())
890 continue;
891 if (RetainedKnowledge RK = getKnowledgeFromBundle(
892 Assume&: *I, BOI: I->bundle_op_info_begin()[Elem.Index])) {
893 // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
894 // be the producer of the pointer in the bundle. At the moment, align
895 // assumptions aren't optimized away.
896 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
897 isPowerOf2_64(Value: RK.ArgValue) &&
898 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT, /*AllowEphemerals*/ true))
899 Known.Zero.setLowBits(Log2_64(Value: RK.ArgValue));
900 }
901 continue;
902 }
903
904 // Warning: This loop can end up being somewhat performance sensitive.
905 // We're running this loop for once for each value queried resulting in a
906 // runtime of ~O(#assumes * #values).
907
908 Value *Arg = I->getArgOperand(i: 0);
909
910 if (Arg == V && isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT)) {
911 assert(BitWidth == 1 && "assume operand is not i1?");
912 (void)BitWidth;
913 Known.setAllOnes();
914 return;
915 }
916 if (match(V: Arg, P: m_Not(V: m_Specific(V))) &&
917 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT)) {
918 assert(BitWidth == 1 && "assume operand is not i1?");
919 (void)BitWidth;
920 Known.setAllZero();
921 return;
922 }
923 auto *Trunc = dyn_cast<TruncInst>(Val: Arg);
924 if (Trunc && Trunc->getOperand(i_nocapture: 0) == V &&
925 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT)) {
926 if (Trunc->hasNoUnsignedWrap()) {
927 Known = KnownBits::makeConstant(C: APInt(BitWidth, 1));
928 return;
929 }
930 Known.One.setBit(0);
931 return;
932 }
933
934 // The remaining tests are all recursive, so bail out if we hit the limit.
935 if (Depth == MaxAnalysisRecursionDepth)
936 continue;
937
938 ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: Arg);
939 if (!Cmp)
940 continue;
941
942 if (!isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
943 continue;
944
945 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ: Q, /*Invert=*/false);
946 }
947
948 // Conflicting assumption: Undefined behavior will occur on this execution
949 // path.
950 if (Known.hasConflict())
951 Known.resetAll();
952}
953
954/// Compute known bits from a shift operator, including those with a
955/// non-constant shift amount. Known is the output of this function. Known2 is a
956/// pre-allocated temporary with the same bit width as Known and on return
957/// contains the known bit of the shift value source. KF is an
958/// operator-specific function that, given the known-bits and a shift amount,
959/// compute the implied known-bits of the shift operator's result respectively
960/// for that shift amount. The results from calling KF are conservatively
961/// combined for all permitted shift amounts.
962static void computeKnownBitsFromShiftOperator(
963 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
964 KnownBits &Known2, const SimplifyQuery &Q, unsigned Depth,
965 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
966 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
967 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Q, Depth: Depth + 1);
968 // To limit compile-time impact, only query isKnownNonZero() if we know at
969 // least something about the shift amount.
970 bool ShAmtNonZero =
971 Known.isNonZero() ||
972 (Known.getMaxValue().ult(RHS: Known.getBitWidth()) &&
973 isKnownNonZero(V: I->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1));
974 Known = KF(Known2, Known, ShAmtNonZero);
975}
976
977static KnownBits
978getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
979 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
980 const SimplifyQuery &Q, unsigned Depth) {
981 unsigned BitWidth = KnownLHS.getBitWidth();
982 KnownBits KnownOut(BitWidth);
983 bool IsAnd = false;
984 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
985 Value *X = nullptr, *Y = nullptr;
986
987 switch (I->getOpcode()) {
988 case Instruction::And:
989 KnownOut = KnownLHS & KnownRHS;
990 IsAnd = true;
991 // and(x, -x) is common idioms that will clear all but lowest set
992 // bit. If we have a single known bit in x, we can clear all bits
993 // above it.
994 // TODO: instcombine often reassociates independent `and` which can hide
995 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
996 if (HasKnownOne && match(V: I, P: m_c_And(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
997 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
998 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
999 KnownOut = KnownLHS.blsi();
1000 else
1001 KnownOut = KnownRHS.blsi();
1002 }
1003 break;
1004 case Instruction::Or:
1005 KnownOut = KnownLHS | KnownRHS;
1006 break;
1007 case Instruction::Xor:
1008 KnownOut = KnownLHS ^ KnownRHS;
1009 // xor(x, x-1) is common idioms that will clear all but lowest set
1010 // bit. If we have a single known bit in x, we can clear all bits
1011 // above it.
1012 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
1013 // -1 but for the purpose of demanded bits (xor(x, x-C) &
1014 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
1015 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
1016 if (HasKnownOne &&
1017 match(V: I, P: m_c_Xor(L: m_Value(V&: X), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
1018 const KnownBits &XBits = I->getOperand(i: 0) == X ? KnownLHS : KnownRHS;
1019 KnownOut = XBits.blsmsk();
1020 }
1021 break;
1022 default:
1023 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
1024 }
1025
1026 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
1027 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
1028 // here we handle the more general case of adding any odd number by
1029 // matching the form and/xor/or(x, add(x, y)) where y is odd.
1030 // TODO: This could be generalized to clearing any bit set in y where the
1031 // following bit is known to be unset in y.
1032 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
1033 (match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_c_Add(L: m_Deferred(V: X), R: m_Value(V&: Y)))) ||
1034 match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_Sub(L: m_Deferred(V: X), R: m_Value(V&: Y)))) ||
1035 match(V: I, P: m_c_BinOp(L: m_Value(V&: X), R: m_Sub(L: m_Value(V&: Y), R: m_Deferred(V: X)))))) {
1036 KnownBits KnownY(BitWidth);
1037 computeKnownBits(V: Y, DemandedElts, Known&: KnownY, Q, Depth: Depth + 1);
1038 if (KnownY.countMinTrailingOnes() > 0) {
1039 if (IsAnd)
1040 KnownOut.Zero.setBit(0);
1041 else
1042 KnownOut.One.setBit(0);
1043 }
1044 }
1045 return KnownOut;
1046}
1047
1048static KnownBits computeKnownBitsForHorizontalOperation(
1049 const Operator *I, const APInt &DemandedElts, const SimplifyQuery &Q,
1050 unsigned Depth,
1051 const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
1052 KnownBitsFunc) {
1053 APInt DemandedEltsLHS, DemandedEltsRHS;
1054 getHorizDemandedEltsForFirstOperand(VectorBitWidth: Q.DL.getTypeSizeInBits(Ty: I->getType()),
1055 DemandedElts, DemandedLHS&: DemandedEltsLHS,
1056 DemandedRHS&: DemandedEltsRHS);
1057
1058 const auto ComputeForSingleOpFunc =
1059 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
1060 return KnownBitsFunc(
1061 computeKnownBits(V: Op, DemandedElts: DemandedEltsOp, Q, Depth: Depth + 1),
1062 computeKnownBits(V: Op, DemandedElts: DemandedEltsOp << 1, Q, Depth: Depth + 1));
1063 };
1064
1065 if (DemandedEltsRHS.isZero())
1066 return ComputeForSingleOpFunc(I->getOperand(i: 0), DemandedEltsLHS);
1067 if (DemandedEltsLHS.isZero())
1068 return ComputeForSingleOpFunc(I->getOperand(i: 1), DemandedEltsRHS);
1069
1070 return ComputeForSingleOpFunc(I->getOperand(i: 0), DemandedEltsLHS)
1071 .intersectWith(RHS: ComputeForSingleOpFunc(I->getOperand(i: 1), DemandedEltsRHS));
1072}
1073
1074// Public so this can be used in `SimplifyDemandedUseBits`.
1075KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
1076 const KnownBits &KnownLHS,
1077 const KnownBits &KnownRHS,
1078 const SimplifyQuery &SQ,
1079 unsigned Depth) {
1080 auto *FVTy = dyn_cast<FixedVectorType>(Val: I->getType());
1081 APInt DemandedElts =
1082 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
1083
1084 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Q: SQ,
1085 Depth);
1086}
1087
1088ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
1089 Attribute Attr = F->getFnAttribute(Kind: Attribute::VScaleRange);
1090 // Without vscale_range, we only know that vscale is non-zero.
1091 if (!Attr.isValid())
1092 return ConstantRange(APInt(BitWidth, 1), APInt::getZero(numBits: BitWidth));
1093
1094 unsigned AttrMin = Attr.getVScaleRangeMin();
1095 // Minimum is larger than vscale width, result is always poison.
1096 if ((unsigned)llvm::bit_width(Value: AttrMin) > BitWidth)
1097 return ConstantRange::getEmpty(BitWidth);
1098
1099 APInt Min(BitWidth, AttrMin);
1100 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1101 if (!AttrMax || (unsigned)llvm::bit_width(Value: *AttrMax) > BitWidth)
1102 return ConstantRange(Min, APInt::getZero(numBits: BitWidth));
1103
1104 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1105}
1106
1107void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond,
1108 Value *Arm, bool Invert,
1109 const SimplifyQuery &Q, unsigned Depth) {
1110 // If we have a constant arm, we are done.
1111 if (Known.isConstant())
1112 return;
1113
1114 // See what condition implies about the bits of the select arm.
1115 KnownBits CondRes(Known.getBitWidth());
1116 computeKnownBitsFromCond(V: Arm, Cond, Known&: CondRes, SQ: Q, Invert, Depth: Depth + 1);
1117 // If we don't get any information from the condition, no reason to
1118 // proceed.
1119 if (CondRes.isUnknown())
1120 return;
1121
1122 // We can have conflict if the condition is dead. I.e if we have
1123 // (x | 64) < 32 ? (x | 64) : y
1124 // we will have conflict at bit 6 from the condition/the `or`.
1125 // In that case just return. Its not particularly important
1126 // what we do, as this select is going to be simplified soon.
1127 CondRes = CondRes.unionWith(RHS: Known);
1128 if (CondRes.hasConflict())
1129 return;
1130
1131 // Finally make sure the information we found is valid. This is relatively
1132 // expensive so it's left for the very end.
1133 if (!isGuaranteedNotToBeUndef(V: Arm, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1))
1134 return;
1135
1136 // Finally, we know we get information from the condition and its valid,
1137 // so return it.
1138 Known = CondRes;
1139}
1140
1141// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1142// Returns the input and lower/upper bounds.
1143static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
1144 const APInt *&CLow, const APInt *&CHigh) {
1145 assert(isa<Operator>(Select) &&
1146 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
1147 "Input should be a Select!");
1148
1149 const Value *LHS = nullptr, *RHS = nullptr;
1150 SelectPatternFlavor SPF = matchSelectPattern(V: Select, LHS, RHS).Flavor;
1151 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
1152 return false;
1153
1154 if (!match(V: RHS, P: m_APInt(Res&: CLow)))
1155 return false;
1156
1157 const Value *LHS2 = nullptr, *RHS2 = nullptr;
1158 SelectPatternFlavor SPF2 = matchSelectPattern(V: LHS, LHS&: LHS2, RHS&: RHS2).Flavor;
1159 if (getInverseMinMaxFlavor(SPF) != SPF2)
1160 return false;
1161
1162 if (!match(V: RHS2, P: m_APInt(Res&: CHigh)))
1163 return false;
1164
1165 if (SPF == SPF_SMIN)
1166 std::swap(a&: CLow, b&: CHigh);
1167
1168 In = LHS2;
1169 return CLow->sle(RHS: *CHigh);
1170}
1171
1172static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
1173 const APInt *&CLow,
1174 const APInt *&CHigh) {
1175 assert((II->getIntrinsicID() == Intrinsic::smin ||
1176 II->getIntrinsicID() == Intrinsic::smax) &&
1177 "Must be smin/smax");
1178
1179 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(MinMaxID: II->getIntrinsicID());
1180 auto *InnerII = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: 0));
1181 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
1182 !match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: CLow)) ||
1183 !match(V: InnerII->getArgOperand(i: 1), P: m_APInt(Res&: CHigh)))
1184 return false;
1185
1186 if (II->getIntrinsicID() == Intrinsic::smin)
1187 std::swap(a&: CLow, b&: CHigh);
1188 return CLow->sle(RHS: *CHigh);
1189}
1190
1191static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst *II,
1192 KnownBits &Known) {
1193 const APInt *CLow, *CHigh;
1194 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
1195 Known = Known.unionWith(
1196 RHS: ConstantRange::getNonEmpty(Lower: *CLow, Upper: *CHigh + 1).toKnownBits());
1197}
1198
1199static void computeKnownBitsFromOperator(const Operator *I,
1200 const APInt &DemandedElts,
1201 KnownBits &Known,
1202 const SimplifyQuery &Q,
1203 unsigned Depth) {
1204 unsigned BitWidth = Known.getBitWidth();
1205
1206 KnownBits Known2(BitWidth);
1207 switch (I->getOpcode()) {
1208 default: break;
1209 case Instruction::Load:
1210 if (MDNode *MD =
1211 Q.IIQ.getMetadata(I: cast<LoadInst>(Val: I), KindID: LLVMContext::MD_range))
1212 computeKnownBitsFromRangeMetadata(Ranges: *MD, Known);
1213 break;
1214 case Instruction::And:
1215 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Q, Depth: Depth + 1);
1216 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1217
1218 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Q, Depth);
1219 break;
1220 case Instruction::Or:
1221 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Q, Depth: Depth + 1);
1222 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1223
1224 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Q, Depth);
1225 break;
1226 case Instruction::Xor:
1227 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known, Q, Depth: Depth + 1);
1228 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1229
1230 Known = getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS: Known2, KnownRHS: Known, Q, Depth);
1231 break;
1232 case Instruction::Mul: {
1233 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1234 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1235 computeKnownBitsMul(Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, NUW,
1236 DemandedElts, Known, Known2, Q, Depth);
1237 break;
1238 }
1239 case Instruction::UDiv: {
1240 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1241 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1242 Known =
1243 KnownBits::udiv(LHS: Known, RHS: Known2, Exact: Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)));
1244 break;
1245 }
1246 case Instruction::SDiv: {
1247 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1248 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1249 Known =
1250 KnownBits::sdiv(LHS: Known, RHS: Known2, Exact: Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)));
1251 break;
1252 }
1253 case Instruction::Select: {
1254 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1255 KnownBits Res(Known.getBitWidth());
1256 computeKnownBits(V: Arm, DemandedElts, Known&: Res, Q, Depth: Depth + 1);
1257 adjustKnownBitsForSelectArm(Known&: Res, Cond: I->getOperand(i: 0), Arm, Invert, Q, Depth);
1258 return Res;
1259 };
1260 // Only known if known in both the LHS and RHS.
1261 Known =
1262 ComputeForArm(I->getOperand(i: 1), /*Invert=*/false)
1263 .intersectWith(RHS: ComputeForArm(I->getOperand(i: 2), /*Invert=*/true));
1264 break;
1265 }
1266 case Instruction::FPTrunc:
1267 case Instruction::FPExt:
1268 case Instruction::FPToUI:
1269 case Instruction::FPToSI:
1270 case Instruction::SIToFP:
1271 case Instruction::UIToFP:
1272 break; // Can't work with floating point.
1273 case Instruction::PtrToInt:
1274 case Instruction::IntToPtr:
1275 // Fall through and handle them the same as zext/trunc.
1276 [[fallthrough]];
1277 case Instruction::ZExt:
1278 case Instruction::Trunc: {
1279 Type *SrcTy = I->getOperand(i: 0)->getType();
1280
1281 unsigned SrcBitWidth;
1282 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1283 // which fall through here.
1284 Type *ScalarTy = SrcTy->getScalarType();
1285 SrcBitWidth = ScalarTy->isPointerTy() ?
1286 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1287 Q.DL.getTypeSizeInBits(Ty: ScalarTy);
1288
1289 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1290 Known = Known.anyextOrTrunc(BitWidth: SrcBitWidth);
1291 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1292 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(Val: I);
1293 Inst && Inst->hasNonNeg() && !Known.isNegative())
1294 Known.makeNonNegative();
1295 Known = Known.zextOrTrunc(BitWidth);
1296 break;
1297 }
1298 case Instruction::BitCast: {
1299 Type *SrcTy = I->getOperand(i: 0)->getType();
1300 if (SrcTy->isIntOrPtrTy() &&
1301 // TODO: For now, not handling conversions like:
1302 // (bitcast i64 %x to <2 x i32>)
1303 !I->getType()->isVectorTy()) {
1304 computeKnownBits(V: I->getOperand(i: 0), Known, Q, Depth: Depth + 1);
1305 break;
1306 }
1307
1308 const Value *V;
1309 // Handle bitcast from floating point to integer.
1310 if (match(V: I, P: m_ElementWiseBitCast(Op: m_Value(V))) &&
1311 V->getType()->isFPOrFPVectorTy()) {
1312 Type *FPType = V->getType()->getScalarType();
1313 KnownFPClass Result =
1314 computeKnownFPClass(V, DemandedElts, InterestedClasses: fcAllFlags, SQ: Q, Depth: Depth + 1);
1315 FPClassTest FPClasses = Result.KnownFPClasses;
1316
1317 // TODO: Treat it as zero/poison if the use of I is unreachable.
1318 if (FPClasses == fcNone)
1319 break;
1320
1321 if (Result.isKnownNever(Mask: fcNormal | fcSubnormal | fcNan)) {
1322 Known.Zero.setAllBits();
1323 Known.One.setAllBits();
1324
1325 if (FPClasses & fcInf)
1326 Known = Known.intersectWith(RHS: KnownBits::makeConstant(
1327 C: APFloat::getInf(Sem: FPType->getFltSemantics()).bitcastToAPInt()));
1328
1329 if (FPClasses & fcZero)
1330 Known = Known.intersectWith(RHS: KnownBits::makeConstant(
1331 C: APInt::getZero(numBits: FPType->getScalarSizeInBits())));
1332
1333 Known.Zero.clearSignBit();
1334 Known.One.clearSignBit();
1335 }
1336
1337 if (Result.SignBit) {
1338 if (*Result.SignBit)
1339 Known.makeNegative();
1340 else
1341 Known.makeNonNegative();
1342 }
1343
1344 break;
1345 }
1346
1347 // Handle cast from vector integer type to scalar or vector integer.
1348 auto *SrcVecTy = dyn_cast<FixedVectorType>(Val: SrcTy);
1349 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1350 !I->getType()->isIntOrIntVectorTy() ||
1351 isa<ScalableVectorType>(Val: I->getType()))
1352 break;
1353
1354 // Look through a cast from narrow vector elements to wider type.
1355 // Examples: v4i32 -> v2i64, v3i8 -> v24
1356 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1357 if (BitWidth % SubBitWidth == 0) {
1358 // Known bits are automatically intersected across demanded elements of a
1359 // vector. So for example, if a bit is computed as known zero, it must be
1360 // zero across all demanded elements of the vector.
1361 //
1362 // For this bitcast, each demanded element of the output is sub-divided
1363 // across a set of smaller vector elements in the source vector. To get
1364 // the known bits for an entire element of the output, compute the known
1365 // bits for each sub-element sequentially. This is done by shifting the
1366 // one-set-bit demanded elements parameter across the sub-elements for
1367 // consecutive calls to computeKnownBits. We are using the demanded
1368 // elements parameter as a mask operator.
1369 //
1370 // The known bits of each sub-element are then inserted into place
1371 // (dependent on endian) to form the full result of known bits.
1372 unsigned NumElts = DemandedElts.getBitWidth();
1373 unsigned SubScale = BitWidth / SubBitWidth;
1374 APInt SubDemandedElts = APInt::getZero(numBits: NumElts * SubScale);
1375 for (unsigned i = 0; i != NumElts; ++i) {
1376 if (DemandedElts[i])
1377 SubDemandedElts.setBit(i * SubScale);
1378 }
1379
1380 KnownBits KnownSrc(SubBitWidth);
1381 for (unsigned i = 0; i != SubScale; ++i) {
1382 computeKnownBits(V: I->getOperand(i: 0), DemandedElts: SubDemandedElts.shl(shiftAmt: i), Known&: KnownSrc, Q,
1383 Depth: Depth + 1);
1384 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1385 Known.insertBits(SubBits: KnownSrc, BitPosition: ShiftElt * SubBitWidth);
1386 }
1387 }
1388 break;
1389 }
1390 case Instruction::SExt: {
1391 // Compute the bits in the result that are not present in the input.
1392 unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits();
1393
1394 Known = Known.trunc(BitWidth: SrcBitWidth);
1395 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1396 // If the sign bit of the input is known set or clear, then we know the
1397 // top bits of the result.
1398 Known = Known.sext(BitWidth);
1399 break;
1400 }
1401 case Instruction::Shl: {
1402 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1403 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1404 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1405 bool ShAmtNonZero) {
1406 return KnownBits::shl(LHS: KnownVal, RHS: KnownAmt, NUW, NSW, ShAmtNonZero);
1407 };
1408 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1409 KF);
1410 // Trailing zeros of a right-shifted constant never decrease.
1411 const APInt *C;
1412 if (match(V: I->getOperand(i: 0), P: m_APInt(Res&: C)))
1413 Known.Zero.setLowBits(C->countr_zero());
1414 break;
1415 }
1416 case Instruction::LShr: {
1417 bool Exact = Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I));
1418 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1419 bool ShAmtNonZero) {
1420 return KnownBits::lshr(LHS: KnownVal, RHS: KnownAmt, ShAmtNonZero, Exact);
1421 };
1422 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1423 KF);
1424 // Leading zeros of a left-shifted constant never decrease.
1425 const APInt *C;
1426 if (match(V: I->getOperand(i: 0), P: m_APInt(Res&: C)))
1427 Known.Zero.setHighBits(C->countl_zero());
1428 break;
1429 }
1430 case Instruction::AShr: {
1431 bool Exact = Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I));
1432 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1433 bool ShAmtNonZero) {
1434 return KnownBits::ashr(LHS: KnownVal, RHS: KnownAmt, ShAmtNonZero, Exact);
1435 };
1436 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1437 KF);
1438 break;
1439 }
1440 case Instruction::Sub: {
1441 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1442 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1443 computeKnownBitsAddSub(Add: false, Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, NUW,
1444 DemandedElts, KnownOut&: Known, Known2, Q, Depth);
1445 break;
1446 }
1447 case Instruction::Add: {
1448 bool NSW = Q.IIQ.hasNoSignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1449 bool NUW = Q.IIQ.hasNoUnsignedWrap(Op: cast<OverflowingBinaryOperator>(Val: I));
1450 computeKnownBitsAddSub(Add: true, Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1), NSW, NUW,
1451 DemandedElts, KnownOut&: Known, Known2, Q, Depth);
1452 break;
1453 }
1454 case Instruction::SRem:
1455 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1456 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1457 Known = KnownBits::srem(LHS: Known, RHS: Known2);
1458 break;
1459
1460 case Instruction::URem:
1461 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1462 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1463 Known = KnownBits::urem(LHS: Known, RHS: Known2);
1464 break;
1465 case Instruction::Alloca:
1466 Known.Zero.setLowBits(Log2(A: cast<AllocaInst>(Val: I)->getAlign()));
1467 break;
1468 case Instruction::GetElementPtr: {
1469 // Analyze all of the subscripts of this getelementptr instruction
1470 // to determine if we can prove known low zero bits.
1471 computeKnownBits(V: I->getOperand(i: 0), Known, Q, Depth: Depth + 1);
1472 // Accumulate the constant indices in a separate variable
1473 // to minimize the number of calls to computeForAddSub.
1474 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Ty: I->getType());
1475 APInt AccConstIndices(IndexWidth, 0);
1476
1477 auto AddIndexToKnown = [&](KnownBits IndexBits) {
1478 if (IndexWidth == BitWidth) {
1479 // Note that inbounds does *not* guarantee nsw for the addition, as only
1480 // the offset is signed, while the base address is unsigned.
1481 Known = KnownBits::add(LHS: Known, RHS: IndexBits);
1482 } else {
1483 // If the index width is smaller than the pointer width, only add the
1484 // value to the low bits.
1485 assert(IndexWidth < BitWidth &&
1486 "Index width can't be larger than pointer width");
1487 Known.insertBits(SubBits: KnownBits::add(LHS: Known.trunc(BitWidth: IndexWidth), RHS: IndexBits), BitPosition: 0);
1488 }
1489 };
1490
1491 gep_type_iterator GTI = gep_type_begin(GEP: I);
1492 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1493 // TrailZ can only become smaller, short-circuit if we hit zero.
1494 if (Known.isUnknown())
1495 break;
1496
1497 Value *Index = I->getOperand(i);
1498
1499 // Handle case when index is zero.
1500 Constant *CIndex = dyn_cast<Constant>(Val: Index);
1501 if (CIndex && CIndex->isZeroValue())
1502 continue;
1503
1504 if (StructType *STy = GTI.getStructTypeOrNull()) {
1505 // Handle struct member offset arithmetic.
1506
1507 assert(CIndex &&
1508 "Access to structure field must be known at compile time");
1509
1510 if (CIndex->getType()->isVectorTy())
1511 Index = CIndex->getSplatValue();
1512
1513 unsigned Idx = cast<ConstantInt>(Val: Index)->getZExtValue();
1514 const StructLayout *SL = Q.DL.getStructLayout(Ty: STy);
1515 uint64_t Offset = SL->getElementOffset(Idx);
1516 AccConstIndices += Offset;
1517 continue;
1518 }
1519
1520 // Handle array index arithmetic.
1521 Type *IndexedTy = GTI.getIndexedType();
1522 if (!IndexedTy->isSized()) {
1523 Known.resetAll();
1524 break;
1525 }
1526
1527 TypeSize Stride = GTI.getSequentialElementStride(DL: Q.DL);
1528 uint64_t StrideInBytes = Stride.getKnownMinValue();
1529 if (!Stride.isScalable()) {
1530 // Fast path for constant offset.
1531 if (auto *CI = dyn_cast<ConstantInt>(Val: Index)) {
1532 AccConstIndices +=
1533 CI->getValue().sextOrTrunc(width: IndexWidth) * StrideInBytes;
1534 continue;
1535 }
1536 }
1537
1538 KnownBits IndexBits =
1539 computeKnownBits(V: Index, Q, Depth: Depth + 1).sextOrTrunc(BitWidth: IndexWidth);
1540 KnownBits ScalingFactor(IndexWidth);
1541 // Multiply by current sizeof type.
1542 // &A[i] == A + i * sizeof(*A[i]).
1543 if (Stride.isScalable()) {
1544 // For scalable types the only thing we know about sizeof is
1545 // that this is a multiple of the minimum size.
1546 ScalingFactor.Zero.setLowBits(llvm::countr_zero(Val: StrideInBytes));
1547 } else {
1548 ScalingFactor =
1549 KnownBits::makeConstant(C: APInt(IndexWidth, StrideInBytes));
1550 }
1551 AddIndexToKnown(KnownBits::mul(LHS: IndexBits, RHS: ScalingFactor));
1552 }
1553 if (!Known.isUnknown() && !AccConstIndices.isZero())
1554 AddIndexToKnown(KnownBits::makeConstant(C: AccConstIndices));
1555 break;
1556 }
1557 case Instruction::PHI: {
1558 const PHINode *P = cast<PHINode>(Val: I);
1559 BinaryOperator *BO = nullptr;
1560 Value *R = nullptr, *L = nullptr;
1561 if (matchSimpleRecurrence(P, BO, Start&: R, Step&: L)) {
1562 // Handle the case of a simple two-predecessor recurrence PHI.
1563 // There's a lot more that could theoretically be done here, but
1564 // this is sufficient to catch some interesting cases.
1565 unsigned Opcode = BO->getOpcode();
1566
1567 switch (Opcode) {
1568 // If this is a shift recurrence, we know the bits being shifted in. We
1569 // can combine that with information about the start value of the
1570 // recurrence to conclude facts about the result. If this is a udiv
1571 // recurrence, we know that the result can never exceed either the
1572 // numerator or the start value, whichever is greater.
1573 case Instruction::LShr:
1574 case Instruction::AShr:
1575 case Instruction::Shl:
1576 case Instruction::UDiv:
1577 if (BO->getOperand(i_nocapture: 0) != I)
1578 break;
1579 [[fallthrough]];
1580
1581 // For a urem recurrence, the result can never exceed the start value. The
1582 // phi could either be the numerator or the denominator.
1583 case Instruction::URem: {
1584 // We have matched a recurrence of the form:
1585 // %iv = [R, %entry], [%iv.next, %backedge]
1586 // %iv.next = shift_op %iv, L
1587
1588 // Recurse with the phi context to avoid concern about whether facts
1589 // inferred hold at original context instruction. TODO: It may be
1590 // correct to use the original context. IF warranted, explore and
1591 // add sufficient tests to cover.
1592 SimplifyQuery RecQ = Q.getWithoutCondContext();
1593 RecQ.CxtI = P;
1594 computeKnownBits(V: R, DemandedElts, Known&: Known2, Q: RecQ, Depth: Depth + 1);
1595 switch (Opcode) {
1596 case Instruction::Shl:
1597 // A shl recurrence will only increase the tailing zeros
1598 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1599 break;
1600 case Instruction::LShr:
1601 case Instruction::UDiv:
1602 case Instruction::URem:
1603 // lshr, udiv, and urem recurrences will preserve the leading zeros of
1604 // the start value.
1605 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1606 break;
1607 case Instruction::AShr:
1608 // An ashr recurrence will extend the initial sign bit
1609 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1610 Known.One.setHighBits(Known2.countMinLeadingOnes());
1611 break;
1612 }
1613 break;
1614 }
1615
1616 // Check for operations that have the property that if
1617 // both their operands have low zero bits, the result
1618 // will have low zero bits.
1619 case Instruction::Add:
1620 case Instruction::Sub:
1621 case Instruction::And:
1622 case Instruction::Or:
1623 case Instruction::Mul: {
1624 // Change the context instruction to the "edge" that flows into the
1625 // phi. This is important because that is where the value is actually
1626 // "evaluated" even though it is used later somewhere else. (see also
1627 // D69571).
1628 SimplifyQuery RecQ = Q.getWithoutCondContext();
1629
1630 unsigned OpNum = P->getOperand(i_nocapture: 0) == R ? 0 : 1;
1631 Instruction *RInst = P->getIncomingBlock(i: OpNum)->getTerminator();
1632 Instruction *LInst = P->getIncomingBlock(i: 1 - OpNum)->getTerminator();
1633
1634 // Ok, we have a PHI of the form L op= R. Check for low
1635 // zero bits.
1636 RecQ.CxtI = RInst;
1637 computeKnownBits(V: R, DemandedElts, Known&: Known2, Q: RecQ, Depth: Depth + 1);
1638
1639 // We need to take the minimum number of known bits
1640 KnownBits Known3(BitWidth);
1641 RecQ.CxtI = LInst;
1642 computeKnownBits(V: L, DemandedElts, Known&: Known3, Q: RecQ, Depth: Depth + 1);
1643
1644 Known.Zero.setLowBits(std::min(a: Known2.countMinTrailingZeros(),
1645 b: Known3.countMinTrailingZeros()));
1646
1647 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(Val: BO);
1648 if (!OverflowOp || !Q.IIQ.hasNoSignedWrap(Op: OverflowOp))
1649 break;
1650
1651 switch (Opcode) {
1652 // If initial value of recurrence is nonnegative, and we are adding
1653 // a nonnegative number with nsw, the result can only be nonnegative
1654 // or poison value regardless of the number of times we execute the
1655 // add in phi recurrence. If initial value is negative and we are
1656 // adding a negative number with nsw, the result can only be
1657 // negative or poison value. Similar arguments apply to sub and mul.
1658 //
1659 // (add non-negative, non-negative) --> non-negative
1660 // (add negative, negative) --> negative
1661 case Instruction::Add: {
1662 if (Known2.isNonNegative() && Known3.isNonNegative())
1663 Known.makeNonNegative();
1664 else if (Known2.isNegative() && Known3.isNegative())
1665 Known.makeNegative();
1666 break;
1667 }
1668
1669 // (sub nsw non-negative, negative) --> non-negative
1670 // (sub nsw negative, non-negative) --> negative
1671 case Instruction::Sub: {
1672 if (BO->getOperand(i_nocapture: 0) != I)
1673 break;
1674 if (Known2.isNonNegative() && Known3.isNegative())
1675 Known.makeNonNegative();
1676 else if (Known2.isNegative() && Known3.isNonNegative())
1677 Known.makeNegative();
1678 break;
1679 }
1680
1681 // (mul nsw non-negative, non-negative) --> non-negative
1682 case Instruction::Mul:
1683 if (Known2.isNonNegative() && Known3.isNonNegative())
1684 Known.makeNonNegative();
1685 break;
1686
1687 default:
1688 break;
1689 }
1690 break;
1691 }
1692
1693 default:
1694 break;
1695 }
1696 }
1697
1698 // Unreachable blocks may have zero-operand PHI nodes.
1699 if (P->getNumIncomingValues() == 0)
1700 break;
1701
1702 // Otherwise take the unions of the known bit sets of the operands,
1703 // taking conservative care to avoid excessive recursion.
1704 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1705 // Skip if every incoming value references to ourself.
1706 if (isa_and_nonnull<UndefValue>(Val: P->hasConstantValue()))
1707 break;
1708
1709 Known.Zero.setAllBits();
1710 Known.One.setAllBits();
1711 for (const Use &U : P->operands()) {
1712 Value *IncValue;
1713 const PHINode *CxtPhi;
1714 Instruction *CxtI;
1715 breakSelfRecursivePHI(U: &U, PHI: P, ValOut&: IncValue, CtxIOut&: CxtI, PhiOut: &CxtPhi);
1716 // Skip direct self references.
1717 if (IncValue == P)
1718 continue;
1719
1720 // Change the context instruction to the "edge" that flows into the
1721 // phi. This is important because that is where the value is actually
1722 // "evaluated" even though it is used later somewhere else. (see also
1723 // D69571).
1724 SimplifyQuery RecQ = Q.getWithoutCondContext().getWithInstruction(I: CxtI);
1725
1726 Known2 = KnownBits(BitWidth);
1727
1728 // Recurse, but cap the recursion to one level, because we don't
1729 // want to waste time spinning around in loops.
1730 // TODO: See if we can base recursion limiter on number of incoming phi
1731 // edges so we don't overly clamp analysis.
1732 computeKnownBits(V: IncValue, DemandedElts, Known&: Known2, Q: RecQ,
1733 Depth: MaxAnalysisRecursionDepth - 1);
1734
1735 // See if we can further use a conditional branch into the phi
1736 // to help us determine the range of the value.
1737 if (!Known2.isConstant()) {
1738 CmpPredicate Pred;
1739 const APInt *RHSC;
1740 BasicBlock *TrueSucc, *FalseSucc;
1741 // TODO: Use RHS Value and compute range from its known bits.
1742 if (match(V: RecQ.CxtI,
1743 P: m_Br(C: m_c_ICmp(Pred, L: m_Specific(V: IncValue), R: m_APInt(Res&: RHSC)),
1744 T: m_BasicBlock(V&: TrueSucc), F: m_BasicBlock(V&: FalseSucc)))) {
1745 // Check for cases of duplicate successors.
1746 if ((TrueSucc == CxtPhi->getParent()) !=
1747 (FalseSucc == CxtPhi->getParent())) {
1748 // If we're using the false successor, invert the predicate.
1749 if (FalseSucc == CxtPhi->getParent())
1750 Pred = CmpInst::getInversePredicate(pred: Pred);
1751 // Get the knownbits implied by the incoming phi condition.
1752 auto CR = ConstantRange::makeExactICmpRegion(Pred, Other: *RHSC);
1753 KnownBits KnownUnion = Known2.unionWith(RHS: CR.toKnownBits());
1754 // We can have conflicts here if we are analyzing deadcode (its
1755 // impossible for us reach this BB based the icmp).
1756 if (KnownUnion.hasConflict()) {
1757 // No reason to continue analyzing in a known dead region, so
1758 // just resetAll and break. This will cause us to also exit the
1759 // outer loop.
1760 Known.resetAll();
1761 break;
1762 }
1763 Known2 = KnownUnion;
1764 }
1765 }
1766 }
1767
1768 Known = Known.intersectWith(RHS: Known2);
1769 // If all bits have been ruled out, there's no need to check
1770 // more operands.
1771 if (Known.isUnknown())
1772 break;
1773 }
1774 }
1775 break;
1776 }
1777 case Instruction::Call:
1778 case Instruction::Invoke: {
1779 // If range metadata is attached to this call, set known bits from that,
1780 // and then intersect with known bits based on other properties of the
1781 // function.
1782 if (MDNode *MD =
1783 Q.IIQ.getMetadata(I: cast<Instruction>(Val: I), KindID: LLVMContext::MD_range))
1784 computeKnownBitsFromRangeMetadata(Ranges: *MD, Known);
1785
1786 const auto *CB = cast<CallBase>(Val: I);
1787
1788 if (std::optional<ConstantRange> Range = CB->getRange())
1789 Known = Known.unionWith(RHS: Range->toKnownBits());
1790
1791 if (const Value *RV = CB->getReturnedArgOperand()) {
1792 if (RV->getType() == I->getType()) {
1793 computeKnownBits(V: RV, Known&: Known2, Q, Depth: Depth + 1);
1794 Known = Known.unionWith(RHS: Known2);
1795 // If the function doesn't return properly for all input values
1796 // (e.g. unreachable exits) then there might be conflicts between the
1797 // argument value and the range metadata. Simply discard the known bits
1798 // in case of conflicts.
1799 if (Known.hasConflict())
1800 Known.resetAll();
1801 }
1802 }
1803 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
1804 switch (II->getIntrinsicID()) {
1805 default:
1806 break;
1807 case Intrinsic::abs: {
1808 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1809 bool IntMinIsPoison = match(V: II->getArgOperand(i: 1), P: m_One());
1810 Known = Known2.abs(IntMinIsPoison);
1811 break;
1812 }
1813 case Intrinsic::bitreverse:
1814 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1815 Known.Zero |= Known2.Zero.reverseBits();
1816 Known.One |= Known2.One.reverseBits();
1817 break;
1818 case Intrinsic::bswap:
1819 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1820 Known.Zero |= Known2.Zero.byteSwap();
1821 Known.One |= Known2.One.byteSwap();
1822 break;
1823 case Intrinsic::ctlz: {
1824 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1825 // If we have a known 1, its position is our upper bound.
1826 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1827 // If this call is poison for 0 input, the result will be less than 2^n.
1828 if (II->getArgOperand(i: 1) == ConstantInt::getTrue(Context&: II->getContext()))
1829 PossibleLZ = std::min(a: PossibleLZ, b: BitWidth - 1);
1830 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
1831 Known.Zero.setBitsFrom(LowBits);
1832 break;
1833 }
1834 case Intrinsic::cttz: {
1835 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1836 // If we have a known 1, its position is our upper bound.
1837 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1838 // If this call is poison for 0 input, the result will be less than 2^n.
1839 if (II->getArgOperand(i: 1) == ConstantInt::getTrue(Context&: II->getContext()))
1840 PossibleTZ = std::min(a: PossibleTZ, b: BitWidth - 1);
1841 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
1842 Known.Zero.setBitsFrom(LowBits);
1843 break;
1844 }
1845 case Intrinsic::ctpop: {
1846 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1847 // We can bound the space the count needs. Also, bits known to be zero
1848 // can't contribute to the population.
1849 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1850 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
1851 Known.Zero.setBitsFrom(LowBits);
1852 // TODO: we could bound KnownOne using the lower bound on the number
1853 // of bits which might be set provided by popcnt KnownOne2.
1854 break;
1855 }
1856 case Intrinsic::fshr:
1857 case Intrinsic::fshl: {
1858 const APInt *SA;
1859 if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA)))
1860 break;
1861
1862 // Normalize to funnel shift left.
1863 uint64_t ShiftAmt = SA->urem(RHS: BitWidth);
1864 if (II->getIntrinsicID() == Intrinsic::fshr)
1865 ShiftAmt = BitWidth - ShiftAmt;
1866
1867 KnownBits Known3(BitWidth);
1868 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1869 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known3, Q, Depth: Depth + 1);
1870
1871 Known.Zero =
1872 Known2.Zero.shl(shiftAmt: ShiftAmt) | Known3.Zero.lshr(shiftAmt: BitWidth - ShiftAmt);
1873 Known.One =
1874 Known2.One.shl(shiftAmt: ShiftAmt) | Known3.One.lshr(shiftAmt: BitWidth - ShiftAmt);
1875 break;
1876 }
1877 case Intrinsic::uadd_sat:
1878 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1879 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1880 Known = KnownBits::uadd_sat(LHS: Known, RHS: Known2);
1881 break;
1882 case Intrinsic::usub_sat:
1883 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1884 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1885 Known = KnownBits::usub_sat(LHS: Known, RHS: Known2);
1886 break;
1887 case Intrinsic::sadd_sat:
1888 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1889 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1890 Known = KnownBits::sadd_sat(LHS: Known, RHS: Known2);
1891 break;
1892 case Intrinsic::ssub_sat:
1893 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1894 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1895 Known = KnownBits::ssub_sat(LHS: Known, RHS: Known2);
1896 break;
1897 // Vec reverse preserves bits from input vec.
1898 case Intrinsic::vector_reverse:
1899 computeKnownBits(V: I->getOperand(i: 0), DemandedElts: DemandedElts.reverseBits(), Known, Q,
1900 Depth: Depth + 1);
1901 break;
1902 // for min/max/and/or reduce, any bit common to each element in the
1903 // input vec is set in the output.
1904 case Intrinsic::vector_reduce_and:
1905 case Intrinsic::vector_reduce_or:
1906 case Intrinsic::vector_reduce_umax:
1907 case Intrinsic::vector_reduce_umin:
1908 case Intrinsic::vector_reduce_smax:
1909 case Intrinsic::vector_reduce_smin:
1910 computeKnownBits(V: I->getOperand(i: 0), Known, Q, Depth: Depth + 1);
1911 break;
1912 case Intrinsic::vector_reduce_xor: {
1913 computeKnownBits(V: I->getOperand(i: 0), Known, Q, Depth: Depth + 1);
1914 // The zeros common to all vecs are zero in the output.
1915 // If the number of elements is odd, then the common ones remain. If the
1916 // number of elements is even, then the common ones becomes zeros.
1917 auto *VecTy = cast<VectorType>(Val: I->getOperand(i: 0)->getType());
1918 // Even, so the ones become zeros.
1919 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1920 if (EvenCnt)
1921 Known.Zero |= Known.One;
1922 // Maybe even element count so need to clear ones.
1923 if (VecTy->isScalableTy() || EvenCnt)
1924 Known.One.clearAllBits();
1925 break;
1926 }
1927 case Intrinsic::umin:
1928 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1929 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1930 Known = KnownBits::umin(LHS: Known, RHS: Known2);
1931 break;
1932 case Intrinsic::umax:
1933 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1934 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1935 Known = KnownBits::umax(LHS: Known, RHS: Known2);
1936 break;
1937 case Intrinsic::smin:
1938 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1939 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1940 Known = KnownBits::smin(LHS: Known, RHS: Known2);
1941 unionWithMinMaxIntrinsicClamp(II, Known);
1942 break;
1943 case Intrinsic::smax:
1944 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1945 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1946 Known = KnownBits::smax(LHS: Known, RHS: Known2);
1947 unionWithMinMaxIntrinsicClamp(II, Known);
1948 break;
1949 case Intrinsic::ptrmask: {
1950 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1951
1952 const Value *Mask = I->getOperand(i: 1);
1953 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1954 computeKnownBits(V: Mask, DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1955 // TODO: 1-extend would be more precise.
1956 Known &= Known2.anyextOrTrunc(BitWidth);
1957 break;
1958 }
1959 case Intrinsic::x86_sse2_pmulh_w:
1960 case Intrinsic::x86_avx2_pmulh_w:
1961 case Intrinsic::x86_avx512_pmulh_w_512:
1962 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1963 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1964 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
1965 break;
1966 case Intrinsic::x86_sse2_pmulhu_w:
1967 case Intrinsic::x86_avx2_pmulhu_w:
1968 case Intrinsic::x86_avx512_pmulhu_w_512:
1969 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
1970 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Known&: Known2, Q, Depth: Depth + 1);
1971 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
1972 break;
1973 case Intrinsic::x86_sse42_crc32_64_64:
1974 Known.Zero.setBitsFrom(32);
1975 break;
1976 case Intrinsic::x86_ssse3_phadd_d_128:
1977 case Intrinsic::x86_ssse3_phadd_w_128:
1978 case Intrinsic::x86_avx2_phadd_d:
1979 case Intrinsic::x86_avx2_phadd_w: {
1980 Known = computeKnownBitsForHorizontalOperation(
1981 I, DemandedElts, Q, Depth,
1982 KnownBitsFunc: [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1983 return KnownBits::add(LHS: KnownLHS, RHS: KnownRHS);
1984 });
1985 break;
1986 }
1987 case Intrinsic::x86_ssse3_phadd_sw_128:
1988 case Intrinsic::x86_avx2_phadd_sw: {
1989 Known = computeKnownBitsForHorizontalOperation(
1990 I, DemandedElts, Q, Depth, KnownBitsFunc: KnownBits::sadd_sat);
1991 break;
1992 }
1993 case Intrinsic::x86_ssse3_phsub_d_128:
1994 case Intrinsic::x86_ssse3_phsub_w_128:
1995 case Intrinsic::x86_avx2_phsub_d:
1996 case Intrinsic::x86_avx2_phsub_w: {
1997 Known = computeKnownBitsForHorizontalOperation(
1998 I, DemandedElts, Q, Depth,
1999 KnownBitsFunc: [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
2000 return KnownBits::sub(LHS: KnownLHS, RHS: KnownRHS);
2001 });
2002 break;
2003 }
2004 case Intrinsic::x86_ssse3_phsub_sw_128:
2005 case Intrinsic::x86_avx2_phsub_sw: {
2006 Known = computeKnownBitsForHorizontalOperation(
2007 I, DemandedElts, Q, Depth, KnownBitsFunc: KnownBits::ssub_sat);
2008 break;
2009 }
2010 case Intrinsic::riscv_vsetvli:
2011 case Intrinsic::riscv_vsetvlimax: {
2012 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
2013 const ConstantRange Range = getVScaleRange(F: II->getFunction(), BitWidth);
2014 uint64_t SEW = RISCVVType::decodeVSEW(
2015 VSEW: cast<ConstantInt>(Val: II->getArgOperand(i: HasAVL))->getZExtValue());
2016 RISCVVType::VLMUL VLMUL = static_cast<RISCVVType::VLMUL>(
2017 cast<ConstantInt>(Val: II->getArgOperand(i: 1 + HasAVL))->getZExtValue());
2018 uint64_t MaxVLEN =
2019 Range.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock;
2020 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul: VLMUL);
2021
2022 // Result of vsetvli must be not larger than AVL.
2023 if (HasAVL)
2024 if (auto *CI = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: 0)))
2025 MaxVL = std::min(a: MaxVL, b: CI->getZExtValue());
2026
2027 unsigned KnownZeroFirstBit = Log2_32(Value: MaxVL) + 1;
2028 if (BitWidth > KnownZeroFirstBit)
2029 Known.Zero.setBitsFrom(KnownZeroFirstBit);
2030 break;
2031 }
2032 case Intrinsic::vscale: {
2033 if (!II->getParent() || !II->getFunction())
2034 break;
2035
2036 Known = getVScaleRange(F: II->getFunction(), BitWidth).toKnownBits();
2037 break;
2038 }
2039 }
2040 }
2041 break;
2042 }
2043 case Instruction::ShuffleVector: {
2044 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: I);
2045 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
2046 if (!Shuf) {
2047 Known.resetAll();
2048 return;
2049 }
2050 // For undef elements, we don't know anything about the common state of
2051 // the shuffle result.
2052 APInt DemandedLHS, DemandedRHS;
2053 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
2054 Known.resetAll();
2055 return;
2056 }
2057 Known.One.setAllBits();
2058 Known.Zero.setAllBits();
2059 if (!!DemandedLHS) {
2060 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
2061 computeKnownBits(V: LHS, DemandedElts: DemandedLHS, Known, Q, Depth: Depth + 1);
2062 // If we don't know any bits, early out.
2063 if (Known.isUnknown())
2064 break;
2065 }
2066 if (!!DemandedRHS) {
2067 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
2068 computeKnownBits(V: RHS, DemandedElts: DemandedRHS, Known&: Known2, Q, Depth: Depth + 1);
2069 Known = Known.intersectWith(RHS: Known2);
2070 }
2071 break;
2072 }
2073 case Instruction::InsertElement: {
2074 if (isa<ScalableVectorType>(Val: I->getType())) {
2075 Known.resetAll();
2076 return;
2077 }
2078 const Value *Vec = I->getOperand(i: 0);
2079 const Value *Elt = I->getOperand(i: 1);
2080 auto *CIdx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
2081 unsigned NumElts = DemandedElts.getBitWidth();
2082 APInt DemandedVecElts = DemandedElts;
2083 bool NeedsElt = true;
2084 // If we know the index we are inserting too, clear it from Vec check.
2085 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
2086 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
2087 NeedsElt = DemandedElts[CIdx->getZExtValue()];
2088 }
2089
2090 Known.One.setAllBits();
2091 Known.Zero.setAllBits();
2092 if (NeedsElt) {
2093 computeKnownBits(V: Elt, Known, Q, Depth: Depth + 1);
2094 // If we don't know any bits, early out.
2095 if (Known.isUnknown())
2096 break;
2097 }
2098
2099 if (!DemandedVecElts.isZero()) {
2100 computeKnownBits(V: Vec, DemandedElts: DemandedVecElts, Known&: Known2, Q, Depth: Depth + 1);
2101 Known = Known.intersectWith(RHS: Known2);
2102 }
2103 break;
2104 }
2105 case Instruction::ExtractElement: {
2106 // Look through extract element. If the index is non-constant or
2107 // out-of-range demand all elements, otherwise just the extracted element.
2108 const Value *Vec = I->getOperand(i: 0);
2109 const Value *Idx = I->getOperand(i: 1);
2110 auto *CIdx = dyn_cast<ConstantInt>(Val: Idx);
2111 if (isa<ScalableVectorType>(Val: Vec->getType())) {
2112 // FIXME: there's probably *something* we can do with scalable vectors
2113 Known.resetAll();
2114 break;
2115 }
2116 unsigned NumElts = cast<FixedVectorType>(Val: Vec->getType())->getNumElements();
2117 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
2118 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
2119 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
2120 computeKnownBits(V: Vec, DemandedElts: DemandedVecElts, Known, Q, Depth: Depth + 1);
2121 break;
2122 }
2123 case Instruction::ExtractValue:
2124 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I->getOperand(i: 0))) {
2125 const ExtractValueInst *EVI = cast<ExtractValueInst>(Val: I);
2126 if (EVI->getNumIndices() != 1) break;
2127 if (EVI->getIndices()[0] == 0) {
2128 switch (II->getIntrinsicID()) {
2129 default: break;
2130 case Intrinsic::uadd_with_overflow:
2131 case Intrinsic::sadd_with_overflow:
2132 computeKnownBitsAddSub(
2133 Add: true, Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), /*NSW=*/false,
2134 /* NUW=*/false, DemandedElts, KnownOut&: Known, Known2, Q, Depth);
2135 break;
2136 case Intrinsic::usub_with_overflow:
2137 case Intrinsic::ssub_with_overflow:
2138 computeKnownBitsAddSub(
2139 Add: false, Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), /*NSW=*/false,
2140 /* NUW=*/false, DemandedElts, KnownOut&: Known, Known2, Q, Depth);
2141 break;
2142 case Intrinsic::umul_with_overflow:
2143 case Intrinsic::smul_with_overflow:
2144 computeKnownBitsMul(Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1), NSW: false,
2145 NUW: false, DemandedElts, Known, Known2, Q, Depth);
2146 break;
2147 }
2148 }
2149 }
2150 break;
2151 case Instruction::Freeze:
2152 if (isGuaranteedNotToBePoison(V: I->getOperand(i: 0), AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT,
2153 Depth: Depth + 1))
2154 computeKnownBits(V: I->getOperand(i: 0), Known, Q, Depth: Depth + 1);
2155 break;
2156 }
2157}
2158
2159/// Determine which bits of V are known to be either zero or one and return
2160/// them.
2161KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2162 const SimplifyQuery &Q, unsigned Depth) {
2163 KnownBits Known(getBitWidth(Ty: V->getType(), DL: Q.DL));
2164 ::computeKnownBits(V, DemandedElts, Known, Q, Depth);
2165 return Known;
2166}
2167
2168/// Determine which bits of V are known to be either zero or one and return
2169/// them.
2170KnownBits llvm::computeKnownBits(const Value *V, const SimplifyQuery &Q,
2171 unsigned Depth) {
2172 KnownBits Known(getBitWidth(Ty: V->getType(), DL: Q.DL));
2173 computeKnownBits(V, Known, Q, Depth);
2174 return Known;
2175}
2176
2177/// Determine which bits of V are known to be either zero or one and return
2178/// them in the Known bit set.
2179///
2180/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2181/// we cannot optimize based on the assumption that it is zero without changing
2182/// it to be an explicit zero. If we don't change it to zero, other code could
2183/// optimized based on the contradictory assumption that it is non-zero.
2184/// Because instcombine aggressively folds operations with undef args anyway,
2185/// this won't lose us code quality.
2186///
2187/// This function is defined on values with integer type, values with pointer
2188/// type, and vectors of integers. In the case
2189/// where V is a vector, known zero, and known one values are the
2190/// same width as the vector element, and the bit is set only if it is true
2191/// for all of the demanded elements in the vector specified by DemandedElts.
2192void computeKnownBits(const Value *V, const APInt &DemandedElts,
2193 KnownBits &Known, const SimplifyQuery &Q,
2194 unsigned Depth) {
2195 if (!DemandedElts) {
2196 // No demanded elts, better to assume we don't know anything.
2197 Known.resetAll();
2198 return;
2199 }
2200
2201 assert(V && "No Value?");
2202 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2203
2204#ifndef NDEBUG
2205 Type *Ty = V->getType();
2206 unsigned BitWidth = Known.getBitWidth();
2207
2208 assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
2209 "Not integer or pointer type!");
2210
2211 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2212 assert(
2213 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2214 "DemandedElt width should equal the fixed vector number of elements");
2215 } else {
2216 assert(DemandedElts == APInt(1, 1) &&
2217 "DemandedElt width should be 1 for scalars or scalable vectors");
2218 }
2219
2220 Type *ScalarTy = Ty->getScalarType();
2221 if (ScalarTy->isPointerTy()) {
2222 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2223 "V and Known should have same BitWidth");
2224 } else {
2225 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2226 "V and Known should have same BitWidth");
2227 }
2228#endif
2229
2230 const APInt *C;
2231 if (match(V, P: m_APInt(Res&: C))) {
2232 // We know all of the bits for a scalar constant or a splat vector constant!
2233 Known = KnownBits::makeConstant(C: *C);
2234 return;
2235 }
2236 // Null and aggregate-zero are all-zeros.
2237 if (isa<ConstantPointerNull>(Val: V) || isa<ConstantAggregateZero>(Val: V)) {
2238 Known.setAllZero();
2239 return;
2240 }
2241 // Handle a constant vector by taking the intersection of the known bits of
2242 // each element.
2243 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(Val: V)) {
2244 assert(!isa<ScalableVectorType>(V->getType()));
2245 // We know that CDV must be a vector of integers. Take the intersection of
2246 // each element.
2247 Known.Zero.setAllBits(); Known.One.setAllBits();
2248 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2249 if (!DemandedElts[i])
2250 continue;
2251 APInt Elt = CDV->getElementAsAPInt(i);
2252 Known.Zero &= ~Elt;
2253 Known.One &= Elt;
2254 }
2255 if (Known.hasConflict())
2256 Known.resetAll();
2257 return;
2258 }
2259
2260 if (const auto *CV = dyn_cast<ConstantVector>(Val: V)) {
2261 assert(!isa<ScalableVectorType>(V->getType()));
2262 // We know that CV must be a vector of integers. Take the intersection of
2263 // each element.
2264 Known.Zero.setAllBits(); Known.One.setAllBits();
2265 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2266 if (!DemandedElts[i])
2267 continue;
2268 Constant *Element = CV->getAggregateElement(Elt: i);
2269 if (isa<PoisonValue>(Val: Element))
2270 continue;
2271 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Val: Element);
2272 if (!ElementCI) {
2273 Known.resetAll();
2274 return;
2275 }
2276 const APInt &Elt = ElementCI->getValue();
2277 Known.Zero &= ~Elt;
2278 Known.One &= Elt;
2279 }
2280 if (Known.hasConflict())
2281 Known.resetAll();
2282 return;
2283 }
2284
2285 // Start out not knowing anything.
2286 Known.resetAll();
2287
2288 // We can't imply anything about undefs.
2289 if (isa<UndefValue>(Val: V))
2290 return;
2291
2292 // There's no point in looking through other users of ConstantData for
2293 // assumptions. Confirm that we've handled them all.
2294 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2295
2296 if (const auto *A = dyn_cast<Argument>(Val: V))
2297 if (std::optional<ConstantRange> Range = A->getRange())
2298 Known = Range->toKnownBits();
2299
2300 // All recursive calls that increase depth must come after this.
2301 if (Depth == MaxAnalysisRecursionDepth)
2302 return;
2303
2304 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2305 // the bits of its aliasee.
2306 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: V)) {
2307 if (!GA->isInterposable())
2308 computeKnownBits(V: GA->getAliasee(), Known, Q, Depth: Depth + 1);
2309 return;
2310 }
2311
2312 if (const Operator *I = dyn_cast<Operator>(Val: V))
2313 computeKnownBitsFromOperator(I, DemandedElts, Known, Q, Depth);
2314 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
2315 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2316 Known = CR->toKnownBits();
2317 }
2318
2319 // Aligned pointers have trailing zeros - refine Known.Zero set
2320 if (isa<PointerType>(Val: V->getType())) {
2321 Align Alignment = V->getPointerAlignment(DL: Q.DL);
2322 Known.Zero.setLowBits(Log2(A: Alignment));
2323 }
2324
2325 // computeKnownBitsFromContext strictly refines Known.
2326 // Therefore, we run them after computeKnownBitsFromOperator.
2327
2328 // Check whether we can determine known bits from context such as assumes.
2329 computeKnownBitsFromContext(V, Known, Q, Depth);
2330}
2331
2332/// Try to detect a recurrence that the value of the induction variable is
2333/// always a power of two (or zero).
2334static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2335 SimplifyQuery &Q, unsigned Depth) {
2336 BinaryOperator *BO = nullptr;
2337 Value *Start = nullptr, *Step = nullptr;
2338 if (!matchSimpleRecurrence(P: PN, BO, Start, Step))
2339 return false;
2340
2341 // Initial value must be a power of two.
2342 for (const Use &U : PN->operands()) {
2343 if (U.get() == Start) {
2344 // Initial value comes from a different BB, need to adjust context
2345 // instruction for analysis.
2346 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2347 if (!isKnownToBeAPowerOfTwo(V: Start, OrZero, Q, Depth))
2348 return false;
2349 }
2350 }
2351
2352 // Except for Mul, the induction variable must be on the left side of the
2353 // increment expression, otherwise its value can be arbitrary.
2354 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(i_nocapture: 1) != Step)
2355 return false;
2356
2357 Q.CxtI = BO->getParent()->getTerminator();
2358 switch (BO->getOpcode()) {
2359 case Instruction::Mul:
2360 // Power of two is closed under multiplication.
2361 return (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: BO) ||
2362 Q.IIQ.hasNoSignedWrap(Op: BO)) &&
2363 isKnownToBeAPowerOfTwo(V: Step, OrZero, Q, Depth);
2364 case Instruction::SDiv:
2365 // Start value must not be signmask for signed division, so simply being a
2366 // power of two is not sufficient, and it has to be a constant.
2367 if (!match(V: Start, P: m_Power2()) || match(V: Start, P: m_SignMask()))
2368 return false;
2369 [[fallthrough]];
2370 case Instruction::UDiv:
2371 // Divisor must be a power of two.
2372 // If OrZero is false, cannot guarantee induction variable is non-zero after
2373 // division, same for Shr, unless it is exact division.
2374 return (OrZero || Q.IIQ.isExact(Op: BO)) &&
2375 isKnownToBeAPowerOfTwo(V: Step, OrZero: false, Q, Depth);
2376 case Instruction::Shl:
2377 return OrZero || Q.IIQ.hasNoUnsignedWrap(Op: BO) || Q.IIQ.hasNoSignedWrap(Op: BO);
2378 case Instruction::AShr:
2379 if (!match(V: Start, P: m_Power2()) || match(V: Start, P: m_SignMask()))
2380 return false;
2381 [[fallthrough]];
2382 case Instruction::LShr:
2383 return OrZero || Q.IIQ.isExact(Op: BO);
2384 default:
2385 return false;
2386 }
2387}
2388
2389/// Return true if we can infer that \p V is known to be a power of 2 from
2390/// dominating condition \p Cond (e.g., ctpop(V) == 1).
2391static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2392 const Value *Cond,
2393 bool CondIsTrue) {
2394 CmpPredicate Pred;
2395 const APInt *RHSC;
2396 if (!match(V: Cond, P: m_ICmp(Pred, L: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Specific(V)),
2397 R: m_APInt(Res&: RHSC))))
2398 return false;
2399 if (!CondIsTrue)
2400 Pred = ICmpInst::getInversePredicate(pred: Pred);
2401 // ctpop(V) u< 2
2402 if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
2403 return true;
2404 // ctpop(V) == 1
2405 return Pred == ICmpInst::ICMP_EQ && *RHSC == 1;
2406}
2407
2408/// Return true if the given value is known to have exactly one
2409/// bit set when defined. For vectors return true if every element is known to
2410/// be a power of two when defined. Supports values with integer or pointer
2411/// types and vectors of integers.
2412bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
2413 const SimplifyQuery &Q, unsigned Depth) {
2414 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2415
2416 if (isa<Constant>(Val: V))
2417 return OrZero ? match(V, P: m_Power2OrZero()) : match(V, P: m_Power2());
2418
2419 // i1 is by definition a power of 2 or zero.
2420 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2421 return true;
2422
2423 // Try to infer from assumptions.
2424 if (Q.AC && Q.CxtI) {
2425 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2426 if (!AssumeVH)
2427 continue;
2428 CallInst *I = cast<CallInst>(Val&: AssumeVH);
2429 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond: I->getArgOperand(i: 0),
2430 /*CondIsTrue=*/true) &&
2431 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
2432 return true;
2433 }
2434 }
2435
2436 // Handle dominating conditions.
2437 if (Q.DC && Q.CxtI && Q.DT) {
2438 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2439 Value *Cond = BI->getCondition();
2440
2441 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
2442 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2443 /*CondIsTrue=*/true) &&
2444 Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()))
2445 return true;
2446
2447 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
2448 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2449 /*CondIsTrue=*/false) &&
2450 Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()))
2451 return true;
2452 }
2453 }
2454
2455 auto *I = dyn_cast<Instruction>(Val: V);
2456 if (!I)
2457 return false;
2458
2459 if (Q.CxtI && match(V, P: m_VScale())) {
2460 const Function *F = Q.CxtI->getFunction();
2461 // The vscale_range indicates vscale is a power-of-two.
2462 return F->hasFnAttribute(Kind: Attribute::VScaleRange);
2463 }
2464
2465 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2466 // it is shifted off the end then the result is undefined.
2467 if (match(V: I, P: m_Shl(L: m_One(), R: m_Value())))
2468 return true;
2469
2470 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2471 // the bottom. If it is shifted off the bottom then the result is undefined.
2472 if (match(V: I, P: m_LShr(L: m_SignMask(), R: m_Value())))
2473 return true;
2474
2475 // The remaining tests are all recursive, so bail out if we hit the limit.
2476 if (Depth++ == MaxAnalysisRecursionDepth)
2477 return false;
2478
2479 switch (I->getOpcode()) {
2480 case Instruction::ZExt:
2481 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth);
2482 case Instruction::Trunc:
2483 return OrZero && isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth);
2484 case Instruction::Shl:
2485 if (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: I) || Q.IIQ.hasNoSignedWrap(Op: I))
2486 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth);
2487 return false;
2488 case Instruction::LShr:
2489 if (OrZero || Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)))
2490 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth);
2491 return false;
2492 case Instruction::UDiv:
2493 if (Q.IIQ.isExact(Op: cast<BinaryOperator>(Val: I)))
2494 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth);
2495 return false;
2496 case Instruction::Mul:
2497 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Q, Depth) &&
2498 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth) &&
2499 (OrZero || isKnownNonZero(V: I, Q, Depth));
2500 case Instruction::And:
2501 // A power of two and'd with anything is a power of two or zero.
2502 if (OrZero &&
2503 (isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), /*OrZero*/ true, Q, Depth) ||
2504 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), /*OrZero*/ true, Q, Depth)))
2505 return true;
2506 // X & (-X) is always a power of two or zero.
2507 if (match(V: I->getOperand(i: 0), P: m_Neg(V: m_Specific(V: I->getOperand(i: 1)))) ||
2508 match(V: I->getOperand(i: 1), P: m_Neg(V: m_Specific(V: I->getOperand(i: 0)))))
2509 return OrZero || isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
2510 return false;
2511 case Instruction::Add: {
2512 // Adding a power-of-two or zero to the same power-of-two or zero yields
2513 // either the original power-of-two, a larger power-of-two or zero.
2514 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(Val: V);
2515 if (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: VOBO) ||
2516 Q.IIQ.hasNoSignedWrap(Op: VOBO)) {
2517 if (match(V: I->getOperand(i: 0),
2518 P: m_c_And(L: m_Specific(V: I->getOperand(i: 1)), R: m_Value())) &&
2519 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Q, Depth))
2520 return true;
2521 if (match(V: I->getOperand(i: 1),
2522 P: m_c_And(L: m_Specific(V: I->getOperand(i: 0)), R: m_Value())) &&
2523 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 0), OrZero, Q, Depth))
2524 return true;
2525
2526 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2527 KnownBits LHSBits(BitWidth);
2528 computeKnownBits(V: I->getOperand(i: 0), Known&: LHSBits, Q, Depth);
2529
2530 KnownBits RHSBits(BitWidth);
2531 computeKnownBits(V: I->getOperand(i: 1), Known&: RHSBits, Q, Depth);
2532 // If i8 V is a power of two or zero:
2533 // ZeroBits: 1 1 1 0 1 1 1 1
2534 // ~ZeroBits: 0 0 0 1 0 0 0 0
2535 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2536 // If OrZero isn't set, we cannot give back a zero result.
2537 // Make sure either the LHS or RHS has a bit set.
2538 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2539 return true;
2540 }
2541
2542 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2543 if (OrZero || Q.IIQ.hasNoUnsignedWrap(Op: VOBO))
2544 if (match(V: I, P: m_Add(L: m_LShr(L: m_AllOnes(), R: m_Value()), R: m_One())))
2545 return true;
2546 return false;
2547 }
2548 case Instruction::Select:
2549 return isKnownToBeAPowerOfTwo(V: I->getOperand(i: 1), OrZero, Q, Depth) &&
2550 isKnownToBeAPowerOfTwo(V: I->getOperand(i: 2), OrZero, Q, Depth);
2551 case Instruction::PHI: {
2552 // A PHI node is power of two if all incoming values are power of two, or if
2553 // it is an induction variable where in each step its value is a power of
2554 // two.
2555 auto *PN = cast<PHINode>(Val: I);
2556 SimplifyQuery RecQ = Q.getWithoutCondContext();
2557
2558 // Check if it is an induction variable and always power of two.
2559 if (isPowerOfTwoRecurrence(PN, OrZero, Q&: RecQ, Depth))
2560 return true;
2561
2562 // Recursively check all incoming values. Limit recursion to 2 levels, so
2563 // that search complexity is limited to number of operands^2.
2564 unsigned NewDepth = std::max(a: Depth, b: MaxAnalysisRecursionDepth - 1);
2565 return llvm::all_of(Range: PN->operands(), P: [&](const Use &U) {
2566 // Value is power of 2 if it is coming from PHI node itself by induction.
2567 if (U.get() == PN)
2568 return true;
2569
2570 // Change the context instruction to the incoming block where it is
2571 // evaluated.
2572 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2573 return isKnownToBeAPowerOfTwo(V: U.get(), OrZero, Q: RecQ, Depth: NewDepth);
2574 });
2575 }
2576 case Instruction::Invoke:
2577 case Instruction::Call: {
2578 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
2579 switch (II->getIntrinsicID()) {
2580 case Intrinsic::umax:
2581 case Intrinsic::smax:
2582 case Intrinsic::umin:
2583 case Intrinsic::smin:
2584 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 1), OrZero, Q, Depth) &&
2585 isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Q, Depth);
2586 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2587 // thus dont change pow2/non-pow2 status.
2588 case Intrinsic::bitreverse:
2589 case Intrinsic::bswap:
2590 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Q, Depth);
2591 case Intrinsic::fshr:
2592 case Intrinsic::fshl:
2593 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2594 if (II->getArgOperand(i: 0) == II->getArgOperand(i: 1))
2595 return isKnownToBeAPowerOfTwo(V: II->getArgOperand(i: 0), OrZero, Q, Depth);
2596 break;
2597 default:
2598 break;
2599 }
2600 }
2601 return false;
2602 }
2603 default:
2604 return false;
2605 }
2606}
2607
2608/// Test whether a GEP's result is known to be non-null.
2609///
2610/// Uses properties inherent in a GEP to try to determine whether it is known
2611/// to be non-null.
2612///
2613/// Currently this routine does not support vector GEPs.
2614static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
2615 unsigned Depth) {
2616 const Function *F = nullptr;
2617 if (const Instruction *I = dyn_cast<Instruction>(Val: GEP))
2618 F = I->getFunction();
2619
2620 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2621 // may be null iff the base pointer is null and the offset is zero.
2622 if (!GEP->hasNoUnsignedWrap() &&
2623 !(GEP->isInBounds() &&
2624 !NullPointerIsDefined(F, AS: GEP->getPointerAddressSpace())))
2625 return false;
2626
2627 // FIXME: Support vector-GEPs.
2628 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2629
2630 // If the base pointer is non-null, we cannot walk to a null address with an
2631 // inbounds GEP in address space zero.
2632 if (isKnownNonZero(V: GEP->getPointerOperand(), Q, Depth))
2633 return true;
2634
2635 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2636 // If so, then the GEP cannot produce a null pointer, as doing so would
2637 // inherently violate the inbounds contract within address space zero.
2638 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2639 GTI != GTE; ++GTI) {
2640 // Struct types are easy -- they must always be indexed by a constant.
2641 if (StructType *STy = GTI.getStructTypeOrNull()) {
2642 ConstantInt *OpC = cast<ConstantInt>(Val: GTI.getOperand());
2643 unsigned ElementIdx = OpC->getZExtValue();
2644 const StructLayout *SL = Q.DL.getStructLayout(Ty: STy);
2645 uint64_t ElementOffset = SL->getElementOffset(Idx: ElementIdx);
2646 if (ElementOffset > 0)
2647 return true;
2648 continue;
2649 }
2650
2651 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2652 if (GTI.getSequentialElementStride(DL: Q.DL).isZero())
2653 continue;
2654
2655 // Fast path the constant operand case both for efficiency and so we don't
2656 // increment Depth when just zipping down an all-constant GEP.
2657 if (ConstantInt *OpC = dyn_cast<ConstantInt>(Val: GTI.getOperand())) {
2658 if (!OpC->isZero())
2659 return true;
2660 continue;
2661 }
2662
2663 // We post-increment Depth here because while isKnownNonZero increments it
2664 // as well, when we pop back up that increment won't persist. We don't want
2665 // to recurse 10k times just because we have 10k GEP operands. We don't
2666 // bail completely out because we want to handle constant GEPs regardless
2667 // of depth.
2668 if (Depth++ >= MaxAnalysisRecursionDepth)
2669 continue;
2670
2671 if (isKnownNonZero(V: GTI.getOperand(), Q, Depth))
2672 return true;
2673 }
2674
2675 return false;
2676}
2677
2678static bool isKnownNonNullFromDominatingCondition(const Value *V,
2679 const Instruction *CtxI,
2680 const DominatorTree *DT) {
2681 assert(!isa<Constant>(V) && "Called for constant?");
2682
2683 if (!CtxI || !DT)
2684 return false;
2685
2686 unsigned NumUsesExplored = 0;
2687 for (auto &U : V->uses()) {
2688 // Avoid massive lists
2689 if (NumUsesExplored >= DomConditionsMaxUses)
2690 break;
2691 NumUsesExplored++;
2692
2693 const Instruction *UI = cast<Instruction>(Val: U.getUser());
2694 // If the value is used as an argument to a call or invoke, then argument
2695 // attributes may provide an answer about null-ness.
2696 if (V->getType()->isPointerTy()) {
2697 if (const auto *CB = dyn_cast<CallBase>(Val: UI)) {
2698 if (CB->isArgOperand(U: &U) &&
2699 CB->paramHasNonNullAttr(ArgNo: CB->getArgOperandNo(U: &U),
2700 /*AllowUndefOrPoison=*/false) &&
2701 DT->dominates(Def: CB, User: CtxI))
2702 return true;
2703 }
2704 }
2705
2706 // If the value is used as a load/store, then the pointer must be non null.
2707 if (V == getLoadStorePointerOperand(V: UI)) {
2708 if (!NullPointerIsDefined(F: UI->getFunction(),
2709 AS: V->getType()->getPointerAddressSpace()) &&
2710 DT->dominates(Def: UI, User: CtxI))
2711 return true;
2712 }
2713
2714 if ((match(V: UI, P: m_IDiv(L: m_Value(), R: m_Specific(V))) ||
2715 match(V: UI, P: m_IRem(L: m_Value(), R: m_Specific(V)))) &&
2716 isValidAssumeForContext(Inv: UI, CxtI: CtxI, DT))
2717 return true;
2718
2719 // Consider only compare instructions uniquely controlling a branch
2720 Value *RHS;
2721 CmpPredicate Pred;
2722 if (!match(V: UI, P: m_c_ICmp(Pred, L: m_Specific(V), R: m_Value(V&: RHS))))
2723 continue;
2724
2725 bool NonNullIfTrue;
2726 if (cmpExcludesZero(Pred, RHS))
2727 NonNullIfTrue = true;
2728 else if (cmpExcludesZero(Pred: CmpInst::getInversePredicate(pred: Pred), RHS))
2729 NonNullIfTrue = false;
2730 else
2731 continue;
2732
2733 SmallVector<const User *, 4> WorkList;
2734 SmallPtrSet<const User *, 4> Visited;
2735 for (const auto *CmpU : UI->users()) {
2736 assert(WorkList.empty() && "Should be!");
2737 if (Visited.insert(Ptr: CmpU).second)
2738 WorkList.push_back(Elt: CmpU);
2739
2740 while (!WorkList.empty()) {
2741 auto *Curr = WorkList.pop_back_val();
2742
2743 // If a user is an AND, add all its users to the work list. We only
2744 // propagate "pred != null" condition through AND because it is only
2745 // correct to assume that all conditions of AND are met in true branch.
2746 // TODO: Support similar logic of OR and EQ predicate?
2747 if (NonNullIfTrue)
2748 if (match(V: Curr, P: m_LogicalAnd(L: m_Value(), R: m_Value()))) {
2749 for (const auto *CurrU : Curr->users())
2750 if (Visited.insert(Ptr: CurrU).second)
2751 WorkList.push_back(Elt: CurrU);
2752 continue;
2753 }
2754
2755 if (const BranchInst *BI = dyn_cast<BranchInst>(Val: Curr)) {
2756 assert(BI->isConditional() && "uses a comparison!");
2757
2758 BasicBlock *NonNullSuccessor =
2759 BI->getSuccessor(i: NonNullIfTrue ? 0 : 1);
2760 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2761 if (Edge.isSingleEdge() && DT->dominates(BBE: Edge, BB: CtxI->getParent()))
2762 return true;
2763 } else if (NonNullIfTrue && isGuard(U: Curr) &&
2764 DT->dominates(Def: cast<Instruction>(Val: Curr), User: CtxI)) {
2765 return true;
2766 }
2767 }
2768 }
2769 }
2770
2771 return false;
2772}
2773
2774/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2775/// ensure that the value it's attached to is never Value? 'RangeType' is
2776/// is the type of the value described by the range.
2777static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2778 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2779 assert(NumRanges >= 1);
2780 for (unsigned i = 0; i < NumRanges; ++i) {
2781 ConstantInt *Lower =
2782 mdconst::extract<ConstantInt>(MD: Ranges->getOperand(I: 2 * i + 0));
2783 ConstantInt *Upper =
2784 mdconst::extract<ConstantInt>(MD: Ranges->getOperand(I: 2 * i + 1));
2785 ConstantRange Range(Lower->getValue(), Upper->getValue());
2786 if (Range.contains(Val: Value))
2787 return false;
2788 }
2789 return true;
2790}
2791
2792/// Try to detect a recurrence that monotonically increases/decreases from a
2793/// non-zero starting value. These are common as induction variables.
2794static bool isNonZeroRecurrence(const PHINode *PN) {
2795 BinaryOperator *BO = nullptr;
2796 Value *Start = nullptr, *Step = nullptr;
2797 const APInt *StartC, *StepC;
2798 if (!matchSimpleRecurrence(P: PN, BO, Start, Step) ||
2799 !match(V: Start, P: m_APInt(Res&: StartC)) || StartC->isZero())
2800 return false;
2801
2802 switch (BO->getOpcode()) {
2803 case Instruction::Add:
2804 // Starting from non-zero and stepping away from zero can never wrap back
2805 // to zero.
2806 return BO->hasNoUnsignedWrap() ||
2807 (BO->hasNoSignedWrap() && match(V: Step, P: m_APInt(Res&: StepC)) &&
2808 StartC->isNegative() == StepC->isNegative());
2809 case Instruction::Mul:
2810 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2811 match(V: Step, P: m_APInt(Res&: StepC)) && !StepC->isZero();
2812 case Instruction::Shl:
2813 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2814 case Instruction::AShr:
2815 case Instruction::LShr:
2816 return BO->isExact();
2817 default:
2818 return false;
2819 }
2820}
2821
2822static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2823 return match(V: Op0, P: m_ZExtOrSExt(Op: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
2824 L: m_Specific(V: Op1), R: m_Zero()))) ||
2825 match(V: Op1, P: m_ZExtOrSExt(Op: m_SpecificICmp(MatchPred: ICmpInst::ICMP_EQ,
2826 L: m_Specific(V: Op0), R: m_Zero())));
2827}
2828
2829static bool isNonZeroAdd(const APInt &DemandedElts, const SimplifyQuery &Q,
2830 unsigned BitWidth, Value *X, Value *Y, bool NSW,
2831 bool NUW, unsigned Depth) {
2832 // (X + (X != 0)) is non zero
2833 if (matchOpWithOpEqZero(Op0: X, Op1: Y))
2834 return true;
2835
2836 if (NUW)
2837 return isKnownNonZero(V: Y, DemandedElts, Q, Depth) ||
2838 isKnownNonZero(V: X, DemandedElts, Q, Depth);
2839
2840 KnownBits XKnown = computeKnownBits(V: X, DemandedElts, Q, Depth);
2841 KnownBits YKnown = computeKnownBits(V: Y, DemandedElts, Q, Depth);
2842
2843 // If X and Y are both non-negative (as signed values) then their sum is not
2844 // zero unless both X and Y are zero.
2845 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2846 if (isKnownNonZero(V: Y, DemandedElts, Q, Depth) ||
2847 isKnownNonZero(V: X, DemandedElts, Q, Depth))
2848 return true;
2849
2850 // If X and Y are both negative (as signed values) then their sum is not
2851 // zero unless both X and Y equal INT_MIN.
2852 if (XKnown.isNegative() && YKnown.isNegative()) {
2853 APInt Mask = APInt::getSignedMaxValue(numBits: BitWidth);
2854 // The sign bit of X is set. If some other bit is set then X is not equal
2855 // to INT_MIN.
2856 if (XKnown.One.intersects(RHS: Mask))
2857 return true;
2858 // The sign bit of Y is set. If some other bit is set then Y is not equal
2859 // to INT_MIN.
2860 if (YKnown.One.intersects(RHS: Mask))
2861 return true;
2862 }
2863
2864 // The sum of a non-negative number and a power of two is not zero.
2865 if (XKnown.isNonNegative() &&
2866 isKnownToBeAPowerOfTwo(V: Y, /*OrZero*/ false, Q, Depth))
2867 return true;
2868 if (YKnown.isNonNegative() &&
2869 isKnownToBeAPowerOfTwo(V: X, /*OrZero*/ false, Q, Depth))
2870 return true;
2871
2872 return KnownBits::add(LHS: XKnown, RHS: YKnown, NSW, NUW).isNonZero();
2873}
2874
2875static bool isNonZeroSub(const APInt &DemandedElts, const SimplifyQuery &Q,
2876 unsigned BitWidth, Value *X, Value *Y,
2877 unsigned Depth) {
2878 // (X - (X != 0)) is non zero
2879 // ((X != 0) - X) is non zero
2880 if (matchOpWithOpEqZero(Op0: X, Op1: Y))
2881 return true;
2882
2883 // TODO: Move this case into isKnownNonEqual().
2884 if (auto *C = dyn_cast<Constant>(Val: X))
2885 if (C->isNullValue() && isKnownNonZero(V: Y, DemandedElts, Q, Depth))
2886 return true;
2887
2888 return ::isKnownNonEqual(V1: X, V2: Y, DemandedElts, Q, Depth);
2889}
2890
2891static bool isNonZeroMul(const APInt &DemandedElts, const SimplifyQuery &Q,
2892 unsigned BitWidth, Value *X, Value *Y, bool NSW,
2893 bool NUW, unsigned Depth) {
2894 // If X and Y are non-zero then so is X * Y as long as the multiplication
2895 // does not overflow.
2896 if (NSW || NUW)
2897 return isKnownNonZero(V: X, DemandedElts, Q, Depth) &&
2898 isKnownNonZero(V: Y, DemandedElts, Q, Depth);
2899
2900 // If either X or Y is odd, then if the other is non-zero the result can't
2901 // be zero.
2902 KnownBits XKnown = computeKnownBits(V: X, DemandedElts, Q, Depth);
2903 if (XKnown.One[0])
2904 return isKnownNonZero(V: Y, DemandedElts, Q, Depth);
2905
2906 KnownBits YKnown = computeKnownBits(V: Y, DemandedElts, Q, Depth);
2907 if (YKnown.One[0])
2908 return XKnown.isNonZero() || isKnownNonZero(V: X, DemandedElts, Q, Depth);
2909
2910 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2911 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2912 // the lowest known One of X and Y. If they are non-zero, the result
2913 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2914 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2915 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2916 BitWidth;
2917}
2918
2919static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2920 const SimplifyQuery &Q, const KnownBits &KnownVal,
2921 unsigned Depth) {
2922 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2923 switch (I->getOpcode()) {
2924 case Instruction::Shl:
2925 return Lhs.shl(ShiftAmt: Rhs);
2926 case Instruction::LShr:
2927 return Lhs.lshr(ShiftAmt: Rhs);
2928 case Instruction::AShr:
2929 return Lhs.ashr(ShiftAmt: Rhs);
2930 default:
2931 llvm_unreachable("Unknown Shift Opcode");
2932 }
2933 };
2934
2935 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2936 switch (I->getOpcode()) {
2937 case Instruction::Shl:
2938 return Lhs.lshr(ShiftAmt: Rhs);
2939 case Instruction::LShr:
2940 case Instruction::AShr:
2941 return Lhs.shl(ShiftAmt: Rhs);
2942 default:
2943 llvm_unreachable("Unknown Shift Opcode");
2944 }
2945 };
2946
2947 if (KnownVal.isUnknown())
2948 return false;
2949
2950 KnownBits KnownCnt =
2951 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Q, Depth);
2952 APInt MaxShift = KnownCnt.getMaxValue();
2953 unsigned NumBits = KnownVal.getBitWidth();
2954 if (MaxShift.uge(RHS: NumBits))
2955 return false;
2956
2957 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2958 return true;
2959
2960 // If all of the bits shifted out are known to be zero, and Val is known
2961 // non-zero then at least one non-zero bit must remain.
2962 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2963 .eq(RHS: InvShiftOp(APInt::getAllOnes(numBits: NumBits), NumBits - MaxShift)) &&
2964 isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth))
2965 return true;
2966
2967 return false;
2968}
2969
2970static bool isKnownNonZeroFromOperator(const Operator *I,
2971 const APInt &DemandedElts,
2972 const SimplifyQuery &Q, unsigned Depth) {
2973 unsigned BitWidth = getBitWidth(Ty: I->getType()->getScalarType(), DL: Q.DL);
2974 switch (I->getOpcode()) {
2975 case Instruction::Alloca:
2976 // Alloca never returns null, malloc might.
2977 return I->getType()->getPointerAddressSpace() == 0;
2978 case Instruction::GetElementPtr:
2979 if (I->getType()->isPointerTy())
2980 return isGEPKnownNonNull(GEP: cast<GEPOperator>(Val: I), Q, Depth);
2981 break;
2982 case Instruction::BitCast: {
2983 // We need to be a bit careful here. We can only peek through the bitcast
2984 // if the scalar size of elements in the operand are smaller than and a
2985 // multiple of the size they are casting too. Take three cases:
2986 //
2987 // 1) Unsafe:
2988 // bitcast <2 x i16> %NonZero to <4 x i8>
2989 //
2990 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2991 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2992 // guranteed (imagine just sign bit set in the 2 i16 elements).
2993 //
2994 // 2) Unsafe:
2995 // bitcast <4 x i3> %NonZero to <3 x i4>
2996 //
2997 // Even though the scalar size of the src (`i3`) is smaller than the
2998 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2999 // its possible for the `3 x i4` elements to be zero because there are
3000 // some elements in the destination that don't contain any full src
3001 // element.
3002 //
3003 // 3) Safe:
3004 // bitcast <4 x i8> %NonZero to <2 x i16>
3005 //
3006 // This is always safe as non-zero in the 4 i8 elements implies
3007 // non-zero in the combination of any two adjacent ones. Since i8 is a
3008 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
3009 // This all implies the 2 i16 elements are non-zero.
3010 Type *FromTy = I->getOperand(i: 0)->getType();
3011 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
3012 (BitWidth % getBitWidth(Ty: FromTy->getScalarType(), DL: Q.DL)) == 0)
3013 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
3014 } break;
3015 case Instruction::IntToPtr:
3016 // Note that we have to take special care to avoid looking through
3017 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
3018 // as casts that can alter the value, e.g., AddrSpaceCasts.
3019 if (!isa<ScalableVectorType>(Val: I->getType()) &&
3020 Q.DL.getTypeSizeInBits(Ty: I->getOperand(i: 0)->getType()).getFixedValue() <=
3021 Q.DL.getTypeSizeInBits(Ty: I->getType()).getFixedValue())
3022 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3023 break;
3024 case Instruction::PtrToInt:
3025 // Similar to int2ptr above, we can look through ptr2int here if the cast
3026 // is a no-op or an extend and not a truncate.
3027 if (!isa<ScalableVectorType>(Val: I->getType()) &&
3028 Q.DL.getTypeSizeInBits(Ty: I->getOperand(i: 0)->getType()).getFixedValue() <=
3029 Q.DL.getTypeSizeInBits(Ty: I->getType()).getFixedValue())
3030 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3031 break;
3032 case Instruction::Trunc:
3033 // nuw/nsw trunc preserves zero/non-zero status of input.
3034 if (auto *TI = dyn_cast<TruncInst>(Val: I))
3035 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
3036 return isKnownNonZero(V: TI->getOperand(i_nocapture: 0), DemandedElts, Q, Depth);
3037 break;
3038
3039 case Instruction::Sub:
3040 return isNonZeroSub(DemandedElts, Q, BitWidth, X: I->getOperand(i: 0),
3041 Y: I->getOperand(i: 1), Depth);
3042 case Instruction::Xor:
3043 // (X ^ (X != 0)) is non zero
3044 if (matchOpWithOpEqZero(Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1)))
3045 return true;
3046 break;
3047 case Instruction::Or:
3048 // (X | (X != 0)) is non zero
3049 if (matchOpWithOpEqZero(Op0: I->getOperand(i: 0), Op1: I->getOperand(i: 1)))
3050 return true;
3051 // X | Y != 0 if X != Y.
3052 if (isKnownNonEqual(V1: I->getOperand(i: 0), V2: I->getOperand(i: 1), DemandedElts, Q,
3053 Depth))
3054 return true;
3055 // X | Y != 0 if X != 0 or Y != 0.
3056 return isKnownNonZero(V: I->getOperand(i: 1), DemandedElts, Q, Depth) ||
3057 isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3058 case Instruction::SExt:
3059 case Instruction::ZExt:
3060 // ext X != 0 if X != 0.
3061 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3062
3063 case Instruction::Shl: {
3064 // shl nsw/nuw can't remove any non-zero bits.
3065 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(Val: I);
3066 if (Q.IIQ.hasNoUnsignedWrap(Op: BO) || Q.IIQ.hasNoSignedWrap(Op: BO))
3067 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3068
3069 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
3070 // if the lowest bit is shifted off the end.
3071 KnownBits Known(BitWidth);
3072 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Known, Q, Depth);
3073 if (Known.One[0])
3074 return true;
3075
3076 return isNonZeroShift(I, DemandedElts, Q, KnownVal: Known, Depth);
3077 }
3078 case Instruction::LShr:
3079 case Instruction::AShr: {
3080 // shr exact can only shift out zero bits.
3081 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(Val: I);
3082 if (BO->isExact())
3083 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3084
3085 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
3086 // defined if the sign bit is shifted off the end.
3087 KnownBits Known =
3088 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3089 if (Known.isNegative())
3090 return true;
3091
3092 return isNonZeroShift(I, DemandedElts, Q, KnownVal: Known, Depth);
3093 }
3094 case Instruction::UDiv:
3095 case Instruction::SDiv: {
3096 // X / Y
3097 // div exact can only produce a zero if the dividend is zero.
3098 if (cast<PossiblyExactOperator>(Val: I)->isExact())
3099 return isKnownNonZero(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3100
3101 KnownBits XKnown =
3102 computeKnownBits(V: I->getOperand(i: 0), DemandedElts, Q, Depth);
3103 // If X is fully unknown we won't be able to figure anything out so don't
3104 // both computing knownbits for Y.
3105 if (XKnown.isUnknown())
3106 return false;
3107
3108 KnownBits YKnown =
3109 computeKnownBits(V: I->getOperand(i: 1), DemandedElts, Q, Depth);
3110 if (I->getOpcode() == Instruction::SDiv) {
3111 // For signed division need to compare abs value of the operands.
3112 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
3113 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
3114 }
3115 // If X u>= Y then div is non zero (0/0 is UB).
3116 std::optional<bool> XUgeY = KnownBits::uge(LHS: XKnown, RHS: YKnown);
3117 // If X is total unknown or X u< Y we won't be able to prove non-zero
3118 // with compute known bits so just return early.
3119 return XUgeY && *XUgeY;
3120 }
3121 case Instruction::Add: {
3122 // X + Y.
3123
3124 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3125 // non-zero.
3126 auto *BO = cast<OverflowingBinaryOperator>(Val: I);
3127 return isNonZeroAdd(DemandedElts, Q, BitWidth, X: I->getOperand(i: 0),
3128 Y: I->getOperand(i: 1), NSW: Q.IIQ.hasNoSignedWrap(Op: BO),
3129 NUW: Q.IIQ.hasNoUnsignedWrap(Op: BO), Depth);
3130 }
3131 case Instruction::Mul: {
3132 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(Val: I);
3133 return isNonZeroMul(DemandedElts, Q, BitWidth, X: I->getOperand(i: 0),
3134 Y: I->getOperand(i: 1), NSW: Q.IIQ.hasNoSignedWrap(Op: BO),
3135 NUW: Q.IIQ.hasNoUnsignedWrap(Op: BO), Depth);
3136 }
3137 case Instruction::Select: {
3138 // (C ? X : Y) != 0 if X != 0 and Y != 0.
3139
3140 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3141 // then see if the select condition implies the arm is non-zero. For example
3142 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3143 // dominated by `X != 0`.
3144 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3145 Value *Op;
3146 Op = IsTrueArm ? I->getOperand(i: 1) : I->getOperand(i: 2);
3147 // Op is trivially non-zero.
3148 if (isKnownNonZero(V: Op, DemandedElts, Q, Depth))
3149 return true;
3150
3151 // The condition of the select dominates the true/false arm. Check if the
3152 // condition implies that a given arm is non-zero.
3153 Value *X;
3154 CmpPredicate Pred;
3155 if (!match(V: I->getOperand(i: 0), P: m_c_ICmp(Pred, L: m_Specific(V: Op), R: m_Value(V&: X))))
3156 return false;
3157
3158 if (!IsTrueArm)
3159 Pred = ICmpInst::getInversePredicate(pred: Pred);
3160
3161 return cmpExcludesZero(Pred, RHS: X);
3162 };
3163
3164 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3165 SelectArmIsNonZero(/* IsTrueArm */ false))
3166 return true;
3167 break;
3168 }
3169 case Instruction::PHI: {
3170 auto *PN = cast<PHINode>(Val: I);
3171 if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
3172 return true;
3173
3174 // Check if all incoming values are non-zero using recursion.
3175 SimplifyQuery RecQ = Q.getWithoutCondContext();
3176 unsigned NewDepth = std::max(a: Depth, b: MaxAnalysisRecursionDepth - 1);
3177 return llvm::all_of(Range: PN->operands(), P: [&](const Use &U) {
3178 if (U.get() == PN)
3179 return true;
3180 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3181 // Check if the branch on the phi excludes zero.
3182 CmpPredicate Pred;
3183 Value *X;
3184 BasicBlock *TrueSucc, *FalseSucc;
3185 if (match(V: RecQ.CxtI,
3186 P: m_Br(C: m_c_ICmp(Pred, L: m_Specific(V: U.get()), R: m_Value(V&: X)),
3187 T: m_BasicBlock(V&: TrueSucc), F: m_BasicBlock(V&: FalseSucc)))) {
3188 // Check for cases of duplicate successors.
3189 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3190 // If we're using the false successor, invert the predicate.
3191 if (FalseSucc == PN->getParent())
3192 Pred = CmpInst::getInversePredicate(pred: Pred);
3193 if (cmpExcludesZero(Pred, RHS: X))
3194 return true;
3195 }
3196 }
3197 // Finally recurse on the edge and check it directly.
3198 return isKnownNonZero(V: U.get(), DemandedElts, Q: RecQ, Depth: NewDepth);
3199 });
3200 }
3201 case Instruction::InsertElement: {
3202 if (isa<ScalableVectorType>(Val: I->getType()))
3203 break;
3204
3205 const Value *Vec = I->getOperand(i: 0);
3206 const Value *Elt = I->getOperand(i: 1);
3207 auto *CIdx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
3208
3209 unsigned NumElts = DemandedElts.getBitWidth();
3210 APInt DemandedVecElts = DemandedElts;
3211 bool SkipElt = false;
3212 // If we know the index we are inserting too, clear it from Vec check.
3213 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
3214 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
3215 SkipElt = !DemandedElts[CIdx->getZExtValue()];
3216 }
3217
3218 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3219 // are non-zero.
3220 return (SkipElt || isKnownNonZero(V: Elt, Q, Depth)) &&
3221 (DemandedVecElts.isZero() ||
3222 isKnownNonZero(V: Vec, DemandedElts: DemandedVecElts, Q, Depth));
3223 }
3224 case Instruction::ExtractElement:
3225 if (const auto *EEI = dyn_cast<ExtractElementInst>(Val: I)) {
3226 const Value *Vec = EEI->getVectorOperand();
3227 const Value *Idx = EEI->getIndexOperand();
3228 auto *CIdx = dyn_cast<ConstantInt>(Val: Idx);
3229 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType())) {
3230 unsigned NumElts = VecTy->getNumElements();
3231 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
3232 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
3233 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
3234 return isKnownNonZero(V: Vec, DemandedElts: DemandedVecElts, Q, Depth);
3235 }
3236 }
3237 break;
3238 case Instruction::ShuffleVector: {
3239 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: I);
3240 if (!Shuf)
3241 break;
3242 APInt DemandedLHS, DemandedRHS;
3243 // For undef elements, we don't know anything about the common state of
3244 // the shuffle result.
3245 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3246 break;
3247 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3248 return (DemandedRHS.isZero() ||
3249 isKnownNonZero(V: Shuf->getOperand(i_nocapture: 1), DemandedElts: DemandedRHS, Q, Depth)) &&
3250 (DemandedLHS.isZero() ||
3251 isKnownNonZero(V: Shuf->getOperand(i_nocapture: 0), DemandedElts: DemandedLHS, Q, Depth));
3252 }
3253 case Instruction::Freeze:
3254 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth) &&
3255 isGuaranteedNotToBePoison(V: I->getOperand(i: 0), AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT,
3256 Depth);
3257 case Instruction::Load: {
3258 auto *LI = cast<LoadInst>(Val: I);
3259 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3260 // is never null.
3261 if (auto *PtrT = dyn_cast<PointerType>(Val: I->getType())) {
3262 if (Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_nonnull) ||
3263 (Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_dereferenceable) &&
3264 !NullPointerIsDefined(F: LI->getFunction(), AS: PtrT->getAddressSpace())))
3265 return true;
3266 } else if (MDNode *Ranges = Q.IIQ.getMetadata(I: LI, KindID: LLVMContext::MD_range)) {
3267 return rangeMetadataExcludesValue(Ranges, Value: APInt::getZero(numBits: BitWidth));
3268 }
3269
3270 // No need to fall through to computeKnownBits as range metadata is already
3271 // handled in isKnownNonZero.
3272 return false;
3273 }
3274 case Instruction::ExtractValue: {
3275 const WithOverflowInst *WO;
3276 if (match(V: I, P: m_ExtractValue<0>(V: m_WithOverflowInst(I&: WO)))) {
3277 switch (WO->getBinaryOp()) {
3278 default:
3279 break;
3280 case Instruction::Add:
3281 return isNonZeroAdd(DemandedElts, Q, BitWidth, X: WO->getArgOperand(i: 0),
3282 Y: WO->getArgOperand(i: 1),
3283 /*NSW=*/false,
3284 /*NUW=*/false, Depth);
3285 case Instruction::Sub:
3286 return isNonZeroSub(DemandedElts, Q, BitWidth, X: WO->getArgOperand(i: 0),
3287 Y: WO->getArgOperand(i: 1), Depth);
3288 case Instruction::Mul:
3289 return isNonZeroMul(DemandedElts, Q, BitWidth, X: WO->getArgOperand(i: 0),
3290 Y: WO->getArgOperand(i: 1),
3291 /*NSW=*/false, /*NUW=*/false, Depth);
3292 break;
3293 }
3294 }
3295 break;
3296 }
3297 case Instruction::Call:
3298 case Instruction::Invoke: {
3299 const auto *Call = cast<CallBase>(Val: I);
3300 if (I->getType()->isPointerTy()) {
3301 if (Call->isReturnNonNull())
3302 return true;
3303 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, MustPreserveNullness: true))
3304 return isKnownNonZero(V: RP, Q, Depth);
3305 } else {
3306 if (MDNode *Ranges = Q.IIQ.getMetadata(I: Call, KindID: LLVMContext::MD_range))
3307 return rangeMetadataExcludesValue(Ranges, Value: APInt::getZero(numBits: BitWidth));
3308 if (std::optional<ConstantRange> Range = Call->getRange()) {
3309 const APInt ZeroValue(Range->getBitWidth(), 0);
3310 if (!Range->contains(Val: ZeroValue))
3311 return true;
3312 }
3313 if (const Value *RV = Call->getReturnedArgOperand())
3314 if (RV->getType() == I->getType() && isKnownNonZero(V: RV, Q, Depth))
3315 return true;
3316 }
3317
3318 if (auto *II = dyn_cast<IntrinsicInst>(Val: I)) {
3319 switch (II->getIntrinsicID()) {
3320 case Intrinsic::sshl_sat:
3321 case Intrinsic::ushl_sat:
3322 case Intrinsic::abs:
3323 case Intrinsic::bitreverse:
3324 case Intrinsic::bswap:
3325 case Intrinsic::ctpop:
3326 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
3327 // NB: We don't do usub_sat here as in any case we can prove its
3328 // non-zero, we will fold it to `sub nuw` in InstCombine.
3329 case Intrinsic::ssub_sat:
3330 return isNonZeroSub(DemandedElts, Q, BitWidth, X: II->getArgOperand(i: 0),
3331 Y: II->getArgOperand(i: 1), Depth);
3332 case Intrinsic::sadd_sat:
3333 return isNonZeroAdd(DemandedElts, Q, BitWidth, X: II->getArgOperand(i: 0),
3334 Y: II->getArgOperand(i: 1),
3335 /*NSW=*/true, /* NUW=*/false, Depth);
3336 // Vec reverse preserves zero/non-zero status from input vec.
3337 case Intrinsic::vector_reverse:
3338 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts: DemandedElts.reverseBits(),
3339 Q, Depth);
3340 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3341 case Intrinsic::vector_reduce_or:
3342 case Intrinsic::vector_reduce_umax:
3343 case Intrinsic::vector_reduce_umin:
3344 case Intrinsic::vector_reduce_smax:
3345 case Intrinsic::vector_reduce_smin:
3346 return isKnownNonZero(V: II->getArgOperand(i: 0), Q, Depth);
3347 case Intrinsic::umax:
3348 case Intrinsic::uadd_sat:
3349 // umax(X, (X != 0)) is non zero
3350 // X +usat (X != 0) is non zero
3351 if (matchOpWithOpEqZero(Op0: II->getArgOperand(i: 0), Op1: II->getArgOperand(i: 1)))
3352 return true;
3353
3354 return isKnownNonZero(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth) ||
3355 isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
3356 case Intrinsic::smax: {
3357 // If either arg is strictly positive the result is non-zero. Otherwise
3358 // the result is non-zero if both ops are non-zero.
3359 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3360 const KnownBits &OpKnown) {
3361 if (!OpNonZero.has_value())
3362 OpNonZero = OpKnown.isNonZero() ||
3363 isKnownNonZero(V: Op, DemandedElts, Q, Depth);
3364 return *OpNonZero;
3365 };
3366 // Avoid re-computing isKnownNonZero.
3367 std::optional<bool> Op0NonZero, Op1NonZero;
3368 KnownBits Op1Known =
3369 computeKnownBits(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth);
3370 if (Op1Known.isNonNegative() &&
3371 IsNonZero(II->getArgOperand(i: 1), Op1NonZero, Op1Known))
3372 return true;
3373 KnownBits Op0Known =
3374 computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
3375 if (Op0Known.isNonNegative() &&
3376 IsNonZero(II->getArgOperand(i: 0), Op0NonZero, Op0Known))
3377 return true;
3378 return IsNonZero(II->getArgOperand(i: 1), Op1NonZero, Op1Known) &&
3379 IsNonZero(II->getArgOperand(i: 0), Op0NonZero, Op0Known);
3380 }
3381 case Intrinsic::smin: {
3382 // If either arg is negative the result is non-zero. Otherwise
3383 // the result is non-zero if both ops are non-zero.
3384 KnownBits Op1Known =
3385 computeKnownBits(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth);
3386 if (Op1Known.isNegative())
3387 return true;
3388 KnownBits Op0Known =
3389 computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
3390 if (Op0Known.isNegative())
3391 return true;
3392
3393 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3394 return true;
3395 }
3396 [[fallthrough]];
3397 case Intrinsic::umin:
3398 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth) &&
3399 isKnownNonZero(V: II->getArgOperand(i: 1), DemandedElts, Q, Depth);
3400 case Intrinsic::cttz:
3401 return computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth)
3402 .Zero[0];
3403 case Intrinsic::ctlz:
3404 return computeKnownBits(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth)
3405 .isNonNegative();
3406 case Intrinsic::fshr:
3407 case Intrinsic::fshl:
3408 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3409 if (II->getArgOperand(i: 0) == II->getArgOperand(i: 1))
3410 return isKnownNonZero(V: II->getArgOperand(i: 0), DemandedElts, Q, Depth);
3411 break;
3412 case Intrinsic::vscale:
3413 return true;
3414 case Intrinsic::experimental_get_vector_length:
3415 return isKnownNonZero(V: I->getOperand(i: 0), Q, Depth);
3416 default:
3417 break;
3418 }
3419 break;
3420 }
3421
3422 return false;
3423 }
3424 }
3425
3426 KnownBits Known(BitWidth);
3427 computeKnownBits(V: I, DemandedElts, Known, Q, Depth);
3428 return Known.One != 0;
3429}
3430
3431/// Return true if the given value is known to be non-zero when defined. For
3432/// vectors, return true if every demanded element is known to be non-zero when
3433/// defined. For pointers, if the context instruction and dominator tree are
3434/// specified, perform context-sensitive analysis and return true if the
3435/// pointer couldn't possibly be null at the specified instruction.
3436/// Supports values with integer or pointer type and vectors of integers.
3437bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3438 const SimplifyQuery &Q, unsigned Depth) {
3439 Type *Ty = V->getType();
3440
3441#ifndef NDEBUG
3442 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3443
3444 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3445 assert(
3446 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3447 "DemandedElt width should equal the fixed vector number of elements");
3448 } else {
3449 assert(DemandedElts == APInt(1, 1) &&
3450 "DemandedElt width should be 1 for scalars");
3451 }
3452#endif
3453
3454 if (auto *C = dyn_cast<Constant>(Val: V)) {
3455 if (C->isNullValue())
3456 return false;
3457 if (isa<ConstantInt>(Val: C))
3458 // Must be non-zero due to null test above.
3459 return true;
3460
3461 // For constant vectors, check that all elements are poison or known
3462 // non-zero to determine that the whole vector is known non-zero.
3463 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Ty)) {
3464 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3465 if (!DemandedElts[i])
3466 continue;
3467 Constant *Elt = C->getAggregateElement(Elt: i);
3468 if (!Elt || Elt->isNullValue())
3469 return false;
3470 if (!isa<PoisonValue>(Val: Elt) && !isa<ConstantInt>(Val: Elt))
3471 return false;
3472 }
3473 return true;
3474 }
3475
3476 // Constant ptrauth can be null, iff the base pointer can be.
3477 if (auto *CPA = dyn_cast<ConstantPtrAuth>(Val: V))
3478 return isKnownNonZero(V: CPA->getPointer(), DemandedElts, Q, Depth);
3479
3480 // A global variable in address space 0 is non null unless extern weak
3481 // or an absolute symbol reference. Other address spaces may have null as a
3482 // valid address for a global, so we can't assume anything.
3483 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
3484 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3485 GV->getType()->getAddressSpace() == 0)
3486 return true;
3487 }
3488
3489 // For constant expressions, fall through to the Operator code below.
3490 if (!isa<ConstantExpr>(Val: V))
3491 return false;
3492 }
3493
3494 if (const auto *A = dyn_cast<Argument>(Val: V))
3495 if (std::optional<ConstantRange> Range = A->getRange()) {
3496 const APInt ZeroValue(Range->getBitWidth(), 0);
3497 if (!Range->contains(Val: ZeroValue))
3498 return true;
3499 }
3500
3501 if (!isa<Constant>(Val: V) && isKnownNonZeroFromAssume(V, Q))
3502 return true;
3503
3504 // Some of the tests below are recursive, so bail out if we hit the limit.
3505 if (Depth++ >= MaxAnalysisRecursionDepth)
3506 return false;
3507
3508 // Check for pointer simplifications.
3509
3510 if (PointerType *PtrTy = dyn_cast<PointerType>(Val: Ty)) {
3511 // A byval, inalloca may not be null in a non-default addres space. A
3512 // nonnull argument is assumed never 0.
3513 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
3514 if (((A->hasPassPointeeByValueCopyAttr() &&
3515 !NullPointerIsDefined(F: A->getParent(), AS: PtrTy->getAddressSpace())) ||
3516 A->hasNonNullAttr()))
3517 return true;
3518 }
3519 }
3520
3521 if (const auto *I = dyn_cast<Operator>(Val: V))
3522 if (isKnownNonZeroFromOperator(I, DemandedElts, Q, Depth))
3523 return true;
3524
3525 if (!isa<Constant>(Val: V) &&
3526 isKnownNonNullFromDominatingCondition(V, CtxI: Q.CxtI, DT: Q.DT))
3527 return true;
3528
3529 if (const Value *Stripped = stripNullTest(V))
3530 return isKnownNonZero(V: Stripped, DemandedElts, Q, Depth);
3531
3532 return false;
3533}
3534
3535bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
3536 unsigned Depth) {
3537 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
3538 APInt DemandedElts =
3539 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
3540 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3541}
3542
3543/// If the pair of operators are the same invertible function, return the
3544/// the operands of the function corresponding to each input. Otherwise,
3545/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3546/// every input value to exactly one output value. This is equivalent to
3547/// saying that Op1 and Op2 are equal exactly when the specified pair of
3548/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3549static std::optional<std::pair<Value*, Value*>>
3550getInvertibleOperands(const Operator *Op1,
3551 const Operator *Op2) {
3552 if (Op1->getOpcode() != Op2->getOpcode())
3553 return std::nullopt;
3554
3555 auto getOperands = [&](unsigned OpNum) -> auto {
3556 return std::make_pair(x: Op1->getOperand(i: OpNum), y: Op2->getOperand(i: OpNum));
3557 };
3558
3559 switch (Op1->getOpcode()) {
3560 default:
3561 break;
3562 case Instruction::Or:
3563 if (!cast<PossiblyDisjointInst>(Val: Op1)->isDisjoint() ||
3564 !cast<PossiblyDisjointInst>(Val: Op2)->isDisjoint())
3565 break;
3566 [[fallthrough]];
3567 case Instruction::Xor:
3568 case Instruction::Add: {
3569 Value *Other;
3570 if (match(V: Op2, P: m_c_BinOp(L: m_Specific(V: Op1->getOperand(i: 0)), R: m_Value(V&: Other))))
3571 return std::make_pair(x: Op1->getOperand(i: 1), y&: Other);
3572 if (match(V: Op2, P: m_c_BinOp(L: m_Specific(V: Op1->getOperand(i: 1)), R: m_Value(V&: Other))))
3573 return std::make_pair(x: Op1->getOperand(i: 0), y&: Other);
3574 break;
3575 }
3576 case Instruction::Sub:
3577 if (Op1->getOperand(i: 0) == Op2->getOperand(i: 0))
3578 return getOperands(1);
3579 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3580 return getOperands(0);
3581 break;
3582 case Instruction::Mul: {
3583 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3584 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3585 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3586 auto *OBO1 = cast<OverflowingBinaryOperator>(Val: Op1);
3587 auto *OBO2 = cast<OverflowingBinaryOperator>(Val: Op2);
3588 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3589 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3590 break;
3591
3592 // Assume operand order has been canonicalized
3593 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1) &&
3594 isa<ConstantInt>(Val: Op1->getOperand(i: 1)) &&
3595 !cast<ConstantInt>(Val: Op1->getOperand(i: 1))->isZero())
3596 return getOperands(0);
3597 break;
3598 }
3599 case Instruction::Shl: {
3600 // Same as multiplies, with the difference that we don't need to check
3601 // for a non-zero multiply. Shifts always multiply by non-zero.
3602 auto *OBO1 = cast<OverflowingBinaryOperator>(Val: Op1);
3603 auto *OBO2 = cast<OverflowingBinaryOperator>(Val: Op2);
3604 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3605 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3606 break;
3607
3608 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3609 return getOperands(0);
3610 break;
3611 }
3612 case Instruction::AShr:
3613 case Instruction::LShr: {
3614 auto *PEO1 = cast<PossiblyExactOperator>(Val: Op1);
3615 auto *PEO2 = cast<PossiblyExactOperator>(Val: Op2);
3616 if (!PEO1->isExact() || !PEO2->isExact())
3617 break;
3618
3619 if (Op1->getOperand(i: 1) == Op2->getOperand(i: 1))
3620 return getOperands(0);
3621 break;
3622 }
3623 case Instruction::SExt:
3624 case Instruction::ZExt:
3625 if (Op1->getOperand(i: 0)->getType() == Op2->getOperand(i: 0)->getType())
3626 return getOperands(0);
3627 break;
3628 case Instruction::PHI: {
3629 const PHINode *PN1 = cast<PHINode>(Val: Op1);
3630 const PHINode *PN2 = cast<PHINode>(Val: Op2);
3631
3632 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3633 // are a single invertible function of the start values? Note that repeated
3634 // application of an invertible function is also invertible
3635 BinaryOperator *BO1 = nullptr;
3636 Value *Start1 = nullptr, *Step1 = nullptr;
3637 BinaryOperator *BO2 = nullptr;
3638 Value *Start2 = nullptr, *Step2 = nullptr;
3639 if (PN1->getParent() != PN2->getParent() ||
3640 !matchSimpleRecurrence(P: PN1, BO&: BO1, Start&: Start1, Step&: Step1) ||
3641 !matchSimpleRecurrence(P: PN2, BO&: BO2, Start&: Start2, Step&: Step2))
3642 break;
3643
3644 auto Values = getInvertibleOperands(Op1: cast<Operator>(Val: BO1),
3645 Op2: cast<Operator>(Val: BO2));
3646 if (!Values)
3647 break;
3648
3649 // We have to be careful of mutually defined recurrences here. Ex:
3650 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3651 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3652 // The invertibility of these is complicated, and not worth reasoning
3653 // about (yet?).
3654 if (Values->first != PN1 || Values->second != PN2)
3655 break;
3656
3657 return std::make_pair(x&: Start1, y&: Start2);
3658 }
3659 }
3660 return std::nullopt;
3661}
3662
3663/// Return true if V1 == (binop V2, X), where X is known non-zero.
3664/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3665/// implies V2 != V1.
3666static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3667 const APInt &DemandedElts,
3668 const SimplifyQuery &Q, unsigned Depth) {
3669 const BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: V1);
3670 if (!BO)
3671 return false;
3672 switch (BO->getOpcode()) {
3673 default:
3674 break;
3675 case Instruction::Or:
3676 if (!cast<PossiblyDisjointInst>(Val: V1)->isDisjoint())
3677 break;
3678 [[fallthrough]];
3679 case Instruction::Xor:
3680 case Instruction::Add:
3681 Value *Op = nullptr;
3682 if (V2 == BO->getOperand(i_nocapture: 0))
3683 Op = BO->getOperand(i_nocapture: 1);
3684 else if (V2 == BO->getOperand(i_nocapture: 1))
3685 Op = BO->getOperand(i_nocapture: 0);
3686 else
3687 return false;
3688 return isKnownNonZero(V: Op, DemandedElts, Q, Depth: Depth + 1);
3689 }
3690 return false;
3691}
3692
3693/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3694/// the multiplication is nuw or nsw.
3695static bool isNonEqualMul(const Value *V1, const Value *V2,
3696 const APInt &DemandedElts, const SimplifyQuery &Q,
3697 unsigned Depth) {
3698 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: V2)) {
3699 const APInt *C;
3700 return match(V: OBO, P: m_Mul(L: m_Specific(V: V1), R: m_APInt(Res&: C))) &&
3701 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3702 !C->isZero() && !C->isOne() &&
3703 isKnownNonZero(V: V1, DemandedElts, Q, Depth: Depth + 1);
3704 }
3705 return false;
3706}
3707
3708/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3709/// the shift is nuw or nsw.
3710static bool isNonEqualShl(const Value *V1, const Value *V2,
3711 const APInt &DemandedElts, const SimplifyQuery &Q,
3712 unsigned Depth) {
3713 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: V2)) {
3714 const APInt *C;
3715 return match(V: OBO, P: m_Shl(L: m_Specific(V: V1), R: m_APInt(Res&: C))) &&
3716 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3717 !C->isZero() && isKnownNonZero(V: V1, DemandedElts, Q, Depth: Depth + 1);
3718 }
3719 return false;
3720}
3721
3722static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3723 const APInt &DemandedElts, const SimplifyQuery &Q,
3724 unsigned Depth) {
3725 // Check two PHIs are in same block.
3726 if (PN1->getParent() != PN2->getParent())
3727 return false;
3728
3729 SmallPtrSet<const BasicBlock *, 8> VisitedBBs;
3730 bool UsedFullRecursion = false;
3731 for (const BasicBlock *IncomBB : PN1->blocks()) {
3732 if (!VisitedBBs.insert(Ptr: IncomBB).second)
3733 continue; // Don't reprocess blocks that we have dealt with already.
3734 const Value *IV1 = PN1->getIncomingValueForBlock(BB: IncomBB);
3735 const Value *IV2 = PN2->getIncomingValueForBlock(BB: IncomBB);
3736 const APInt *C1, *C2;
3737 if (match(V: IV1, P: m_APInt(Res&: C1)) && match(V: IV2, P: m_APInt(Res&: C2)) && *C1 != *C2)
3738 continue;
3739
3740 // Only one pair of phi operands is allowed for full recursion.
3741 if (UsedFullRecursion)
3742 return false;
3743
3744 SimplifyQuery RecQ = Q.getWithoutCondContext();
3745 RecQ.CxtI = IncomBB->getTerminator();
3746 if (!isKnownNonEqual(V1: IV1, V2: IV2, DemandedElts, Q: RecQ, Depth: Depth + 1))
3747 return false;
3748 UsedFullRecursion = true;
3749 }
3750 return true;
3751}
3752
3753static bool isNonEqualSelect(const Value *V1, const Value *V2,
3754 const APInt &DemandedElts, const SimplifyQuery &Q,
3755 unsigned Depth) {
3756 const SelectInst *SI1 = dyn_cast<SelectInst>(Val: V1);
3757 if (!SI1)
3758 return false;
3759
3760 if (const SelectInst *SI2 = dyn_cast<SelectInst>(Val: V2)) {
3761 const Value *Cond1 = SI1->getCondition();
3762 const Value *Cond2 = SI2->getCondition();
3763 if (Cond1 == Cond2)
3764 return isKnownNonEqual(V1: SI1->getTrueValue(), V2: SI2->getTrueValue(),
3765 DemandedElts, Q, Depth: Depth + 1) &&
3766 isKnownNonEqual(V1: SI1->getFalseValue(), V2: SI2->getFalseValue(),
3767 DemandedElts, Q, Depth: Depth + 1);
3768 }
3769 return isKnownNonEqual(V1: SI1->getTrueValue(), V2, DemandedElts, Q, Depth: Depth + 1) &&
3770 isKnownNonEqual(V1: SI1->getFalseValue(), V2, DemandedElts, Q, Depth: Depth + 1);
3771}
3772
3773// Check to see if A is both a GEP and is the incoming value for a PHI in the
3774// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3775// one of them being the recursive GEP A and the other a ptr at same base and at
3776// the same/higher offset than B we are only incrementing the pointer further in
3777// loop if offset of recursive GEP is greater than 0.
3778static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
3779 const SimplifyQuery &Q) {
3780 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3781 return false;
3782
3783 auto *GEPA = dyn_cast<GEPOperator>(Val: A);
3784 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(Val: GEPA->idx_begin()))
3785 return false;
3786
3787 // Handle 2 incoming PHI values with one being a recursive GEP.
3788 auto *PN = dyn_cast<PHINode>(Val: GEPA->getPointerOperand());
3789 if (!PN || PN->getNumIncomingValues() != 2)
3790 return false;
3791
3792 // Search for the recursive GEP as an incoming operand, and record that as
3793 // Step.
3794 Value *Start = nullptr;
3795 Value *Step = const_cast<Value *>(A);
3796 if (PN->getIncomingValue(i: 0) == Step)
3797 Start = PN->getIncomingValue(i: 1);
3798 else if (PN->getIncomingValue(i: 1) == Step)
3799 Start = PN->getIncomingValue(i: 0);
3800 else
3801 return false;
3802
3803 // Other incoming node base should match the B base.
3804 // StartOffset >= OffsetB && StepOffset > 0?
3805 // StartOffset <= OffsetB && StepOffset < 0?
3806 // Is non-equal if above are true.
3807 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3808 // optimisation to inbounds GEPs only.
3809 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Ty: Start->getType());
3810 APInt StartOffset(IndexWidth, 0);
3811 Start = Start->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: StartOffset);
3812 APInt StepOffset(IndexWidth, 0);
3813 Step = Step->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: StepOffset);
3814
3815 // Check if Base Pointer of Step matches the PHI.
3816 if (Step != PN)
3817 return false;
3818 APInt OffsetB(IndexWidth, 0);
3819 B = B->stripAndAccumulateInBoundsConstantOffsets(DL: Q.DL, Offset&: OffsetB);
3820 return Start == B &&
3821 ((StartOffset.sge(RHS: OffsetB) && StepOffset.isStrictlyPositive()) ||
3822 (StartOffset.sle(RHS: OffsetB) && StepOffset.isNegative()));
3823}
3824
3825static bool isKnownNonEqualFromContext(const Value *V1, const Value *V2,
3826 const SimplifyQuery &Q, unsigned Depth) {
3827 if (!Q.CxtI)
3828 return false;
3829
3830 // Try to infer NonEqual based on information from dominating conditions.
3831 if (Q.DC && Q.DT) {
3832 auto IsKnownNonEqualFromDominatingCondition = [&](const Value *V) {
3833 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
3834 Value *Cond = BI->getCondition();
3835 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
3836 if (Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()) &&
3837 isImpliedCondition(LHS: Cond, RHSPred: ICmpInst::ICMP_NE, RHSOp0: V1, RHSOp1: V2, DL: Q.DL,
3838 /*LHSIsTrue=*/true, Depth)
3839 .value_or(u: false))
3840 return true;
3841
3842 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
3843 if (Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()) &&
3844 isImpliedCondition(LHS: Cond, RHSPred: ICmpInst::ICMP_NE, RHSOp0: V1, RHSOp1: V2, DL: Q.DL,
3845 /*LHSIsTrue=*/false, Depth)
3846 .value_or(u: false))
3847 return true;
3848 }
3849
3850 return false;
3851 };
3852
3853 if (IsKnownNonEqualFromDominatingCondition(V1) ||
3854 IsKnownNonEqualFromDominatingCondition(V2))
3855 return true;
3856 }
3857
3858 if (!Q.AC)
3859 return false;
3860
3861 // Try to infer NonEqual based on information from assumptions.
3862 for (auto &AssumeVH : Q.AC->assumptionsFor(V: V1)) {
3863 if (!AssumeVH)
3864 continue;
3865 CallInst *I = cast<CallInst>(Val&: AssumeVH);
3866
3867 assert(I->getFunction() == Q.CxtI->getFunction() &&
3868 "Got assumption for the wrong function!");
3869 assert(I->getIntrinsicID() == Intrinsic::assume &&
3870 "must be an assume intrinsic");
3871
3872 if (isImpliedCondition(LHS: I->getArgOperand(i: 0), RHSPred: ICmpInst::ICMP_NE, RHSOp0: V1, RHSOp1: V2, DL: Q.DL,
3873 /*LHSIsTrue=*/true, Depth)
3874 .value_or(u: false) &&
3875 isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
3876 return true;
3877 }
3878
3879 return false;
3880}
3881
3882/// Return true if it is known that V1 != V2.
3883static bool isKnownNonEqual(const Value *V1, const Value *V2,
3884 const APInt &DemandedElts, const SimplifyQuery &Q,
3885 unsigned Depth) {
3886 if (V1 == V2)
3887 return false;
3888 if (V1->getType() != V2->getType())
3889 // We can't look through casts yet.
3890 return false;
3891
3892 if (Depth >= MaxAnalysisRecursionDepth)
3893 return false;
3894
3895 // See if we can recurse through (exactly one of) our operands. This
3896 // requires our operation be 1-to-1 and map every input value to exactly
3897 // one output value. Such an operation is invertible.
3898 auto *O1 = dyn_cast<Operator>(Val: V1);
3899 auto *O2 = dyn_cast<Operator>(Val: V2);
3900 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3901 if (auto Values = getInvertibleOperands(Op1: O1, Op2: O2))
3902 return isKnownNonEqual(V1: Values->first, V2: Values->second, DemandedElts, Q,
3903 Depth: Depth + 1);
3904
3905 if (const PHINode *PN1 = dyn_cast<PHINode>(Val: V1)) {
3906 const PHINode *PN2 = cast<PHINode>(Val: V2);
3907 // FIXME: This is missing a generalization to handle the case where one is
3908 // a PHI and another one isn't.
3909 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Q, Depth))
3910 return true;
3911 };
3912 }
3913
3914 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Q, Depth) ||
3915 isModifyingBinopOfNonZero(V1: V2, V2: V1, DemandedElts, Q, Depth))
3916 return true;
3917
3918 if (isNonEqualMul(V1, V2, DemandedElts, Q, Depth) ||
3919 isNonEqualMul(V1: V2, V2: V1, DemandedElts, Q, Depth))
3920 return true;
3921
3922 if (isNonEqualShl(V1, V2, DemandedElts, Q, Depth) ||
3923 isNonEqualShl(V1: V2, V2: V1, DemandedElts, Q, Depth))
3924 return true;
3925
3926 if (V1->getType()->isIntOrIntVectorTy()) {
3927 // Are any known bits in V1 contradictory to known bits in V2? If V1
3928 // has a known zero where V2 has a known one, they must not be equal.
3929 KnownBits Known1 = computeKnownBits(V: V1, DemandedElts, Q, Depth);
3930 if (!Known1.isUnknown()) {
3931 KnownBits Known2 = computeKnownBits(V: V2, DemandedElts, Q, Depth);
3932 if (Known1.Zero.intersects(RHS: Known2.One) ||
3933 Known2.Zero.intersects(RHS: Known1.One))
3934 return true;
3935 }
3936 }
3937
3938 if (isNonEqualSelect(V1, V2, DemandedElts, Q, Depth) ||
3939 isNonEqualSelect(V1: V2, V2: V1, DemandedElts, Q, Depth))
3940 return true;
3941
3942 if (isNonEqualPointersWithRecursiveGEP(A: V1, B: V2, Q) ||
3943 isNonEqualPointersWithRecursiveGEP(A: V2, B: V1, Q))
3944 return true;
3945
3946 Value *A, *B;
3947 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3948 // Check PtrToInt type matches the pointer size.
3949 if (match(V: V1, P: m_PtrToIntSameSize(DL: Q.DL, Op: m_Value(V&: A))) &&
3950 match(V: V2, P: m_PtrToIntSameSize(DL: Q.DL, Op: m_Value(V&: B))))
3951 return isKnownNonEqual(V1: A, V2: B, DemandedElts, Q, Depth: Depth + 1);
3952
3953 if (isKnownNonEqualFromContext(V1, V2, Q, Depth))
3954 return true;
3955
3956 return false;
3957}
3958
3959/// For vector constants, loop over the elements and find the constant with the
3960/// minimum number of sign bits. Return 0 if the value is not a vector constant
3961/// or if any element was not analyzed; otherwise, return the count for the
3962/// element with the minimum number of sign bits.
3963static unsigned computeNumSignBitsVectorConstant(const Value *V,
3964 const APInt &DemandedElts,
3965 unsigned TyBits) {
3966 const auto *CV = dyn_cast<Constant>(Val: V);
3967 if (!CV || !isa<FixedVectorType>(Val: CV->getType()))
3968 return 0;
3969
3970 unsigned MinSignBits = TyBits;
3971 unsigned NumElts = cast<FixedVectorType>(Val: CV->getType())->getNumElements();
3972 for (unsigned i = 0; i != NumElts; ++i) {
3973 if (!DemandedElts[i])
3974 continue;
3975 // If we find a non-ConstantInt, bail out.
3976 auto *Elt = dyn_cast_or_null<ConstantInt>(Val: CV->getAggregateElement(Elt: i));
3977 if (!Elt)
3978 return 0;
3979
3980 MinSignBits = std::min(a: MinSignBits, b: Elt->getValue().getNumSignBits());
3981 }
3982
3983 return MinSignBits;
3984}
3985
3986static unsigned ComputeNumSignBitsImpl(const Value *V,
3987 const APInt &DemandedElts,
3988 const SimplifyQuery &Q, unsigned Depth);
3989
3990static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3991 const SimplifyQuery &Q, unsigned Depth) {
3992 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Q, Depth);
3993 assert(Result > 0 && "At least one sign bit needs to be present!");
3994 return Result;
3995}
3996
3997/// Return the number of times the sign bit of the register is replicated into
3998/// the other bits. We know that at least 1 bit is always equal to the sign bit
3999/// (itself), but other cases can give us information. For example, immediately
4000/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
4001/// other, so we return 3. For vectors, return the number of sign bits for the
4002/// vector element with the minimum number of known sign bits of the demanded
4003/// elements in the vector specified by DemandedElts.
4004static unsigned ComputeNumSignBitsImpl(const Value *V,
4005 const APInt &DemandedElts,
4006 const SimplifyQuery &Q, unsigned Depth) {
4007 Type *Ty = V->getType();
4008#ifndef NDEBUG
4009 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
4010
4011 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
4012 assert(
4013 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
4014 "DemandedElt width should equal the fixed vector number of elements");
4015 } else {
4016 assert(DemandedElts == APInt(1, 1) &&
4017 "DemandedElt width should be 1 for scalars");
4018 }
4019#endif
4020
4021 // We return the minimum number of sign bits that are guaranteed to be present
4022 // in V, so for undef we have to conservatively return 1. We don't have the
4023 // same behavior for poison though -- that's a FIXME today.
4024
4025 Type *ScalarTy = Ty->getScalarType();
4026 unsigned TyBits = ScalarTy->isPointerTy() ?
4027 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
4028 Q.DL.getTypeSizeInBits(Ty: ScalarTy);
4029
4030 unsigned Tmp, Tmp2;
4031 unsigned FirstAnswer = 1;
4032
4033 // Note that ConstantInt is handled by the general computeKnownBits case
4034 // below.
4035
4036 if (Depth == MaxAnalysisRecursionDepth)
4037 return 1;
4038
4039 if (auto *U = dyn_cast<Operator>(Val: V)) {
4040 switch (Operator::getOpcode(V)) {
4041 default: break;
4042 case Instruction::BitCast: {
4043 Value *Src = U->getOperand(i: 0);
4044 Type *SrcTy = Src->getType();
4045
4046 // Skip if the source type is not an integer or integer vector type
4047 // This ensures we only process integer-like types
4048 if (!SrcTy->isIntOrIntVectorTy())
4049 break;
4050
4051 unsigned SrcBits = SrcTy->getScalarSizeInBits();
4052
4053 // Bitcast 'large element' scalar/vector to 'small element' vector.
4054 if ((SrcBits % TyBits) != 0)
4055 break;
4056
4057 // Only proceed if the destination type is a fixed-size vector
4058 if (isa<FixedVectorType>(Val: Ty)) {
4059 // Fast case - sign splat can be simply split across the small elements.
4060 // This works for both vector and scalar sources
4061 Tmp = ComputeNumSignBits(V: Src, Q, Depth: Depth + 1);
4062 if (Tmp == SrcBits)
4063 return TyBits;
4064 }
4065 break;
4066 }
4067 case Instruction::SExt:
4068 Tmp = TyBits - U->getOperand(i: 0)->getType()->getScalarSizeInBits();
4069 return ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1) +
4070 Tmp;
4071
4072 case Instruction::SDiv: {
4073 const APInt *Denominator;
4074 // sdiv X, C -> adds log(C) sign bits.
4075 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: Denominator))) {
4076
4077 // Ignore non-positive denominator.
4078 if (!Denominator->isStrictlyPositive())
4079 break;
4080
4081 // Calculate the incoming numerator bits.
4082 unsigned NumBits =
4083 ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4084
4085 // Add floor(log(C)) bits to the numerator bits.
4086 return std::min(a: TyBits, b: NumBits + Denominator->logBase2());
4087 }
4088 break;
4089 }
4090
4091 case Instruction::SRem: {
4092 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4093
4094 const APInt *Denominator;
4095 // srem X, C -> we know that the result is within [-C+1,C) when C is a
4096 // positive constant. This let us put a lower bound on the number of sign
4097 // bits.
4098 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: Denominator))) {
4099
4100 // Ignore non-positive denominator.
4101 if (Denominator->isStrictlyPositive()) {
4102 // Calculate the leading sign bit constraints by examining the
4103 // denominator. Given that the denominator is positive, there are two
4104 // cases:
4105 //
4106 // 1. The numerator is positive. The result range is [0,C) and
4107 // [0,C) u< (1 << ceilLogBase2(C)).
4108 //
4109 // 2. The numerator is negative. Then the result range is (-C,0] and
4110 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
4111 //
4112 // Thus a lower bound on the number of sign bits is `TyBits -
4113 // ceilLogBase2(C)`.
4114
4115 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
4116 Tmp = std::max(a: Tmp, b: ResBits);
4117 }
4118 }
4119 return Tmp;
4120 }
4121
4122 case Instruction::AShr: {
4123 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4124 // ashr X, C -> adds C sign bits. Vectors too.
4125 const APInt *ShAmt;
4126 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: ShAmt))) {
4127 if (ShAmt->uge(RHS: TyBits))
4128 break; // Bad shift.
4129 unsigned ShAmtLimited = ShAmt->getZExtValue();
4130 Tmp += ShAmtLimited;
4131 if (Tmp > TyBits) Tmp = TyBits;
4132 }
4133 return Tmp;
4134 }
4135 case Instruction::Shl: {
4136 const APInt *ShAmt;
4137 Value *X = nullptr;
4138 if (match(V: U->getOperand(i: 1), P: m_APInt(Res&: ShAmt))) {
4139 // shl destroys sign bits.
4140 if (ShAmt->uge(RHS: TyBits))
4141 break; // Bad shift.
4142 // We can look through a zext (more or less treating it as a sext) if
4143 // all extended bits are shifted out.
4144 if (match(V: U->getOperand(i: 0), P: m_ZExt(Op: m_Value(V&: X))) &&
4145 ShAmt->uge(RHS: TyBits - X->getType()->getScalarSizeInBits())) {
4146 Tmp = ComputeNumSignBits(V: X, DemandedElts, Q, Depth: Depth + 1);
4147 Tmp += TyBits - X->getType()->getScalarSizeInBits();
4148 } else
4149 Tmp =
4150 ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4151 if (ShAmt->uge(RHS: Tmp))
4152 break; // Shifted all sign bits out.
4153 Tmp2 = ShAmt->getZExtValue();
4154 return Tmp - Tmp2;
4155 }
4156 break;
4157 }
4158 case Instruction::And:
4159 case Instruction::Or:
4160 case Instruction::Xor: // NOT is handled here.
4161 // Logical binary ops preserve the number of sign bits at the worst.
4162 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4163 if (Tmp != 1) {
4164 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1);
4165 FirstAnswer = std::min(a: Tmp, b: Tmp2);
4166 // We computed what we know about the sign bits as our first
4167 // answer. Now proceed to the generic code that uses
4168 // computeKnownBits, and pick whichever answer is better.
4169 }
4170 break;
4171
4172 case Instruction::Select: {
4173 // If we have a clamp pattern, we know that the number of sign bits will
4174 // be the minimum of the clamp min/max range.
4175 const Value *X;
4176 const APInt *CLow, *CHigh;
4177 if (isSignedMinMaxClamp(Select: U, In&: X, CLow, CHigh))
4178 return std::min(a: CLow->getNumSignBits(), b: CHigh->getNumSignBits());
4179
4180 Tmp = ComputeNumSignBits(V: U->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1);
4181 if (Tmp == 1)
4182 break;
4183 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 2), DemandedElts, Q, Depth: Depth + 1);
4184 return std::min(a: Tmp, b: Tmp2);
4185 }
4186
4187 case Instruction::Add:
4188 // Add can have at most one carry bit. Thus we know that the output
4189 // is, at worst, one more bit than the inputs.
4190 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Q, Depth: Depth + 1);
4191 if (Tmp == 1) break;
4192
4193 // Special case decrementing a value (ADD X, -1):
4194 if (const auto *CRHS = dyn_cast<Constant>(Val: U->getOperand(i: 1)))
4195 if (CRHS->isAllOnesValue()) {
4196 KnownBits Known(TyBits);
4197 computeKnownBits(V: U->getOperand(i: 0), DemandedElts, Known, Q, Depth: Depth + 1);
4198
4199 // If the input is known to be 0 or 1, the output is 0/-1, which is
4200 // all sign bits set.
4201 if ((Known.Zero | 1).isAllOnes())
4202 return TyBits;
4203
4204 // If we are subtracting one from a positive number, there is no carry
4205 // out of the result.
4206 if (Known.isNonNegative())
4207 return Tmp;
4208 }
4209
4210 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1);
4211 if (Tmp2 == 1)
4212 break;
4213 return std::min(a: Tmp, b: Tmp2) - 1;
4214
4215 case Instruction::Sub:
4216 Tmp2 = ComputeNumSignBits(V: U->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1);
4217 if (Tmp2 == 1)
4218 break;
4219
4220 // Handle NEG.
4221 if (const auto *CLHS = dyn_cast<Constant>(Val: U->getOperand(i: 0)))
4222 if (CLHS->isNullValue()) {
4223 KnownBits Known(TyBits);
4224 computeKnownBits(V: U->getOperand(i: 1), DemandedElts, Known, Q, Depth: Depth + 1);
4225 // If the input is known to be 0 or 1, the output is 0/-1, which is
4226 // all sign bits set.
4227 if ((Known.Zero | 1).isAllOnes())
4228 return TyBits;
4229
4230 // If the input is known to be positive (the sign bit is known clear),
4231 // the output of the NEG has the same number of sign bits as the
4232 // input.
4233 if (Known.isNonNegative())
4234 return Tmp2;
4235
4236 // Otherwise, we treat this like a SUB.
4237 }
4238
4239 // Sub can have at most one carry bit. Thus we know that the output
4240 // is, at worst, one more bit than the inputs.
4241 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4242 if (Tmp == 1)
4243 break;
4244 return std::min(a: Tmp, b: Tmp2) - 1;
4245
4246 case Instruction::Mul: {
4247 // The output of the Mul can be at most twice the valid bits in the
4248 // inputs.
4249 unsigned SignBitsOp0 =
4250 ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4251 if (SignBitsOp0 == 1)
4252 break;
4253 unsigned SignBitsOp1 =
4254 ComputeNumSignBits(V: U->getOperand(i: 1), DemandedElts, Q, Depth: Depth + 1);
4255 if (SignBitsOp1 == 1)
4256 break;
4257 unsigned OutValidBits =
4258 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4259 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
4260 }
4261
4262 case Instruction::PHI: {
4263 const PHINode *PN = cast<PHINode>(Val: U);
4264 unsigned NumIncomingValues = PN->getNumIncomingValues();
4265 // Don't analyze large in-degree PHIs.
4266 if (NumIncomingValues > 4) break;
4267 // Unreachable blocks may have zero-operand PHI nodes.
4268 if (NumIncomingValues == 0) break;
4269
4270 // Take the minimum of all incoming values. This can't infinitely loop
4271 // because of our depth threshold.
4272 SimplifyQuery RecQ = Q.getWithoutCondContext();
4273 Tmp = TyBits;
4274 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
4275 if (Tmp == 1) return Tmp;
4276 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4277 Tmp = std::min(a: Tmp, b: ComputeNumSignBits(V: PN->getIncomingValue(i),
4278 DemandedElts, Q: RecQ, Depth: Depth + 1));
4279 }
4280 return Tmp;
4281 }
4282
4283 case Instruction::Trunc: {
4284 // If the input contained enough sign bits that some remain after the
4285 // truncation, then we can make use of that. Otherwise we don't know
4286 // anything.
4287 Tmp = ComputeNumSignBits(V: U->getOperand(i: 0), Q, Depth: Depth + 1);
4288 unsigned OperandTyBits = U->getOperand(i: 0)->getType()->getScalarSizeInBits();
4289 if (Tmp > (OperandTyBits - TyBits))
4290 return Tmp - (OperandTyBits - TyBits);
4291
4292 return 1;
4293 }
4294
4295 case Instruction::ExtractElement:
4296 // Look through extract element. At the moment we keep this simple and
4297 // skip tracking the specific element. But at least we might find
4298 // information valid for all elements of the vector (for example if vector
4299 // is sign extended, shifted, etc).
4300 return ComputeNumSignBits(V: U->getOperand(i: 0), Q, Depth: Depth + 1);
4301
4302 case Instruction::ShuffleVector: {
4303 // Collect the minimum number of sign bits that are shared by every vector
4304 // element referenced by the shuffle.
4305 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: U);
4306 if (!Shuf) {
4307 // FIXME: Add support for shufflevector constant expressions.
4308 return 1;
4309 }
4310 APInt DemandedLHS, DemandedRHS;
4311 // For undef elements, we don't know anything about the common state of
4312 // the shuffle result.
4313 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4314 return 1;
4315 Tmp = std::numeric_limits<unsigned>::max();
4316 if (!!DemandedLHS) {
4317 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
4318 Tmp = ComputeNumSignBits(V: LHS, DemandedElts: DemandedLHS, Q, Depth: Depth + 1);
4319 }
4320 // If we don't know anything, early out and try computeKnownBits
4321 // fall-back.
4322 if (Tmp == 1)
4323 break;
4324 if (!!DemandedRHS) {
4325 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
4326 Tmp2 = ComputeNumSignBits(V: RHS, DemandedElts: DemandedRHS, Q, Depth: Depth + 1);
4327 Tmp = std::min(a: Tmp, b: Tmp2);
4328 }
4329 // If we don't know anything, early out and try computeKnownBits
4330 // fall-back.
4331 if (Tmp == 1)
4332 break;
4333 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4334 return Tmp;
4335 }
4336 case Instruction::Call: {
4337 if (const auto *II = dyn_cast<IntrinsicInst>(Val: U)) {
4338 switch (II->getIntrinsicID()) {
4339 default:
4340 break;
4341 case Intrinsic::abs:
4342 Tmp =
4343 ComputeNumSignBits(V: U->getOperand(i: 0), DemandedElts, Q, Depth: Depth + 1);
4344 if (Tmp == 1)
4345 break;
4346
4347 // Absolute value reduces number of sign bits by at most 1.
4348 return Tmp - 1;
4349 case Intrinsic::smin:
4350 case Intrinsic::smax: {
4351 const APInt *CLow, *CHigh;
4352 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4353 return std::min(a: CLow->getNumSignBits(), b: CHigh->getNumSignBits());
4354 }
4355 }
4356 }
4357 }
4358 }
4359 }
4360
4361 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4362 // use this information.
4363
4364 // If we can examine all elements of a vector constant successfully, we're
4365 // done (we can't do any better than that). If not, keep trying.
4366 if (unsigned VecSignBits =
4367 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4368 return VecSignBits;
4369
4370 KnownBits Known(TyBits);
4371 computeKnownBits(V, DemandedElts, Known, Q, Depth);
4372
4373 // If we know that the sign bit is either zero or one, determine the number of
4374 // identical bits in the top of the input value.
4375 return std::max(a: FirstAnswer, b: Known.countMinSignBits());
4376}
4377
4378Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
4379 const TargetLibraryInfo *TLI) {
4380 const Function *F = CB.getCalledFunction();
4381 if (!F)
4382 return Intrinsic::not_intrinsic;
4383
4384 if (F->isIntrinsic())
4385 return F->getIntrinsicID();
4386
4387 // We are going to infer semantics of a library function based on mapping it
4388 // to an LLVM intrinsic. Check that the library function is available from
4389 // this callbase and in this environment.
4390 LibFunc Func;
4391 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, F&: Func) ||
4392 !CB.onlyReadsMemory())
4393 return Intrinsic::not_intrinsic;
4394
4395 switch (Func) {
4396 default:
4397 break;
4398 case LibFunc_sin:
4399 case LibFunc_sinf:
4400 case LibFunc_sinl:
4401 return Intrinsic::sin;
4402 case LibFunc_cos:
4403 case LibFunc_cosf:
4404 case LibFunc_cosl:
4405 return Intrinsic::cos;
4406 case LibFunc_tan:
4407 case LibFunc_tanf:
4408 case LibFunc_tanl:
4409 return Intrinsic::tan;
4410 case LibFunc_asin:
4411 case LibFunc_asinf:
4412 case LibFunc_asinl:
4413 return Intrinsic::asin;
4414 case LibFunc_acos:
4415 case LibFunc_acosf:
4416 case LibFunc_acosl:
4417 return Intrinsic::acos;
4418 case LibFunc_atan:
4419 case LibFunc_atanf:
4420 case LibFunc_atanl:
4421 return Intrinsic::atan;
4422 case LibFunc_atan2:
4423 case LibFunc_atan2f:
4424 case LibFunc_atan2l:
4425 return Intrinsic::atan2;
4426 case LibFunc_sinh:
4427 case LibFunc_sinhf:
4428 case LibFunc_sinhl:
4429 return Intrinsic::sinh;
4430 case LibFunc_cosh:
4431 case LibFunc_coshf:
4432 case LibFunc_coshl:
4433 return Intrinsic::cosh;
4434 case LibFunc_tanh:
4435 case LibFunc_tanhf:
4436 case LibFunc_tanhl:
4437 return Intrinsic::tanh;
4438 case LibFunc_exp:
4439 case LibFunc_expf:
4440 case LibFunc_expl:
4441 return Intrinsic::exp;
4442 case LibFunc_exp2:
4443 case LibFunc_exp2f:
4444 case LibFunc_exp2l:
4445 return Intrinsic::exp2;
4446 case LibFunc_exp10:
4447 case LibFunc_exp10f:
4448 case LibFunc_exp10l:
4449 return Intrinsic::exp10;
4450 case LibFunc_log:
4451 case LibFunc_logf:
4452 case LibFunc_logl:
4453 return Intrinsic::log;
4454 case LibFunc_log10:
4455 case LibFunc_log10f:
4456 case LibFunc_log10l:
4457 return Intrinsic::log10;
4458 case LibFunc_log2:
4459 case LibFunc_log2f:
4460 case LibFunc_log2l:
4461 return Intrinsic::log2;
4462 case LibFunc_fabs:
4463 case LibFunc_fabsf:
4464 case LibFunc_fabsl:
4465 return Intrinsic::fabs;
4466 case LibFunc_fmin:
4467 case LibFunc_fminf:
4468 case LibFunc_fminl:
4469 return Intrinsic::minnum;
4470 case LibFunc_fmax:
4471 case LibFunc_fmaxf:
4472 case LibFunc_fmaxl:
4473 return Intrinsic::maxnum;
4474 case LibFunc_copysign:
4475 case LibFunc_copysignf:
4476 case LibFunc_copysignl:
4477 return Intrinsic::copysign;
4478 case LibFunc_floor:
4479 case LibFunc_floorf:
4480 case LibFunc_floorl:
4481 return Intrinsic::floor;
4482 case LibFunc_ceil:
4483 case LibFunc_ceilf:
4484 case LibFunc_ceill:
4485 return Intrinsic::ceil;
4486 case LibFunc_trunc:
4487 case LibFunc_truncf:
4488 case LibFunc_truncl:
4489 return Intrinsic::trunc;
4490 case LibFunc_rint:
4491 case LibFunc_rintf:
4492 case LibFunc_rintl:
4493 return Intrinsic::rint;
4494 case LibFunc_nearbyint:
4495 case LibFunc_nearbyintf:
4496 case LibFunc_nearbyintl:
4497 return Intrinsic::nearbyint;
4498 case LibFunc_round:
4499 case LibFunc_roundf:
4500 case LibFunc_roundl:
4501 return Intrinsic::round;
4502 case LibFunc_roundeven:
4503 case LibFunc_roundevenf:
4504 case LibFunc_roundevenl:
4505 return Intrinsic::roundeven;
4506 case LibFunc_pow:
4507 case LibFunc_powf:
4508 case LibFunc_powl:
4509 return Intrinsic::pow;
4510 case LibFunc_sqrt:
4511 case LibFunc_sqrtf:
4512 case LibFunc_sqrtl:
4513 return Intrinsic::sqrt;
4514 }
4515
4516 return Intrinsic::not_intrinsic;
4517}
4518
4519static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4520 Ty = Ty->getScalarType();
4521 DenormalMode Mode = F.getDenormalMode(FPType: Ty->getFltSemantics());
4522 return Mode.Output == DenormalMode::IEEE ||
4523 Mode.Output == DenormalMode::PositiveZero;
4524}
4525/// Given an exploded icmp instruction, return true if the comparison only
4526/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4527/// the result of the comparison is true when the input value is signed.
4528bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
4529 bool &TrueIfSigned) {
4530 switch (Pred) {
4531 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4532 TrueIfSigned = true;
4533 return RHS.isZero();
4534 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4535 TrueIfSigned = true;
4536 return RHS.isAllOnes();
4537 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4538 TrueIfSigned = false;
4539 return RHS.isAllOnes();
4540 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4541 TrueIfSigned = false;
4542 return RHS.isZero();
4543 case ICmpInst::ICMP_UGT:
4544 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4545 TrueIfSigned = true;
4546 return RHS.isMaxSignedValue();
4547 case ICmpInst::ICMP_UGE:
4548 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4549 TrueIfSigned = true;
4550 return RHS.isMinSignedValue();
4551 case ICmpInst::ICMP_ULT:
4552 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4553 TrueIfSigned = false;
4554 return RHS.isMinSignedValue();
4555 case ICmpInst::ICMP_ULE:
4556 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4557 TrueIfSigned = false;
4558 return RHS.isMaxSignedValue();
4559 default:
4560 return false;
4561 }
4562}
4563
4564static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4565 bool CondIsTrue,
4566 const Instruction *CxtI,
4567 KnownFPClass &KnownFromContext,
4568 unsigned Depth = 0) {
4569 Value *A, *B;
4570 if (Depth < MaxAnalysisRecursionDepth &&
4571 (CondIsTrue ? match(V: Cond, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))
4572 : match(V: Cond, P: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
4573 computeKnownFPClassFromCond(V, Cond: A, CondIsTrue, CxtI, KnownFromContext,
4574 Depth: Depth + 1);
4575 computeKnownFPClassFromCond(V, Cond: B, CondIsTrue, CxtI, KnownFromContext,
4576 Depth: Depth + 1);
4577 return;
4578 }
4579 if (Depth < MaxAnalysisRecursionDepth && match(V: Cond, P: m_Not(V: m_Value(V&: A)))) {
4580 computeKnownFPClassFromCond(V, Cond: A, CondIsTrue: !CondIsTrue, CxtI, KnownFromContext,
4581 Depth: Depth + 1);
4582 return;
4583 }
4584 CmpPredicate Pred;
4585 Value *LHS;
4586 uint64_t ClassVal = 0;
4587 const APFloat *CRHS;
4588 const APInt *RHS;
4589 if (match(V: Cond, P: m_FCmp(Pred, L: m_Value(V&: LHS), R: m_APFloat(Res&: CRHS)))) {
4590 auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass(
4591 Pred, F: *CxtI->getParent()->getParent(), LHS, ConstRHS: *CRHS, LookThroughSrc: LHS != V);
4592 if (CmpVal == V)
4593 KnownFromContext.knownNot(RuleOut: ~(CondIsTrue ? MaskIfTrue : MaskIfFalse));
4594 } else if (match(V: Cond, P: m_Intrinsic<Intrinsic::is_fpclass>(
4595 Op0: m_Specific(V), Op1: m_ConstantInt(V&: ClassVal)))) {
4596 FPClassTest Mask = static_cast<FPClassTest>(ClassVal);
4597 KnownFromContext.knownNot(RuleOut: CondIsTrue ? ~Mask : Mask);
4598 } else if (match(V: Cond, P: m_ICmp(Pred, L: m_ElementWiseBitCast(Op: m_Specific(V)),
4599 R: m_APInt(Res&: RHS)))) {
4600 bool TrueIfSigned;
4601 if (!isSignBitCheck(Pred, RHS: *RHS, TrueIfSigned))
4602 return;
4603 if (TrueIfSigned == CondIsTrue)
4604 KnownFromContext.signBitMustBeOne();
4605 else
4606 KnownFromContext.signBitMustBeZero();
4607 }
4608}
4609
4610static KnownFPClass computeKnownFPClassFromContext(const Value *V,
4611 const SimplifyQuery &Q) {
4612 KnownFPClass KnownFromContext;
4613
4614 if (Q.CC && Q.CC->AffectedValues.contains(Ptr: V))
4615 computeKnownFPClassFromCond(V, Cond: Q.CC->Cond, CondIsTrue: !Q.CC->Invert, CxtI: Q.CxtI,
4616 KnownFromContext);
4617
4618 if (!Q.CxtI)
4619 return KnownFromContext;
4620
4621 if (Q.DC && Q.DT) {
4622 // Handle dominating conditions.
4623 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
4624 Value *Cond = BI->getCondition();
4625
4626 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(i: 0));
4627 if (Q.DT->dominates(BBE: Edge0, BB: Q.CxtI->getParent()))
4628 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, CxtI: Q.CxtI,
4629 KnownFromContext);
4630
4631 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(i: 1));
4632 if (Q.DT->dominates(BBE: Edge1, BB: Q.CxtI->getParent()))
4633 computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, CxtI: Q.CxtI,
4634 KnownFromContext);
4635 }
4636 }
4637
4638 if (!Q.AC)
4639 return KnownFromContext;
4640
4641 // Try to restrict the floating-point classes based on information from
4642 // assumptions.
4643 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
4644 if (!AssumeVH)
4645 continue;
4646 CallInst *I = cast<CallInst>(Val&: AssumeVH);
4647
4648 assert(I->getFunction() == Q.CxtI->getParent()->getParent() &&
4649 "Got assumption for the wrong function!");
4650 assert(I->getIntrinsicID() == Intrinsic::assume &&
4651 "must be an assume intrinsic");
4652
4653 if (!isValidAssumeForContext(Inv: I, CxtI: Q.CxtI, DT: Q.DT))
4654 continue;
4655
4656 computeKnownFPClassFromCond(V, Cond: I->getArgOperand(i: 0),
4657 /*CondIsTrue=*/true, CxtI: Q.CxtI, KnownFromContext);
4658 }
4659
4660 return KnownFromContext;
4661}
4662
4663void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4664 FPClassTest InterestedClasses, KnownFPClass &Known,
4665 const SimplifyQuery &Q, unsigned Depth);
4666
4667static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
4668 FPClassTest InterestedClasses,
4669 const SimplifyQuery &Q, unsigned Depth) {
4670 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
4671 APInt DemandedElts =
4672 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
4673 computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Q, Depth);
4674}
4675
4676static void computeKnownFPClassForFPTrunc(const Operator *Op,
4677 const APInt &DemandedElts,
4678 FPClassTest InterestedClasses,
4679 KnownFPClass &Known,
4680 const SimplifyQuery &Q,
4681 unsigned Depth) {
4682 if ((InterestedClasses &
4683 (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
4684 return;
4685
4686 KnownFPClass KnownSrc;
4687 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
4688 Known&: KnownSrc, Q, Depth: Depth + 1);
4689
4690 // Sign should be preserved
4691 // TODO: Handle cannot be ordered greater than zero
4692 if (KnownSrc.cannotBeOrderedLessThanZero())
4693 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4694
4695 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
4696
4697 // Infinity needs a range check.
4698}
4699
4700void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4701 FPClassTest InterestedClasses, KnownFPClass &Known,
4702 const SimplifyQuery &Q, unsigned Depth) {
4703 assert(Known.isUnknown() && "should not be called with known information");
4704
4705 if (!DemandedElts) {
4706 // No demanded elts, better to assume we don't know anything.
4707 Known.resetAll();
4708 return;
4709 }
4710
4711 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
4712
4713 if (auto *CFP = dyn_cast<ConstantFP>(Val: V)) {
4714 Known.KnownFPClasses = CFP->getValueAPF().classify();
4715 Known.SignBit = CFP->isNegative();
4716 return;
4717 }
4718
4719 if (isa<ConstantAggregateZero>(Val: V)) {
4720 Known.KnownFPClasses = fcPosZero;
4721 Known.SignBit = false;
4722 return;
4723 }
4724
4725 if (isa<PoisonValue>(Val: V)) {
4726 Known.KnownFPClasses = fcNone;
4727 Known.SignBit = false;
4728 return;
4729 }
4730
4731 // Try to handle fixed width vector constants
4732 auto *VFVTy = dyn_cast<FixedVectorType>(Val: V->getType());
4733 const Constant *CV = dyn_cast<Constant>(Val: V);
4734 if (VFVTy && CV) {
4735 Known.KnownFPClasses = fcNone;
4736 bool SignBitAllZero = true;
4737 bool SignBitAllOne = true;
4738
4739 // For vectors, verify that each element is not NaN.
4740 unsigned NumElts = VFVTy->getNumElements();
4741 for (unsigned i = 0; i != NumElts; ++i) {
4742 if (!DemandedElts[i])
4743 continue;
4744
4745 Constant *Elt = CV->getAggregateElement(Elt: i);
4746 if (!Elt) {
4747 Known = KnownFPClass();
4748 return;
4749 }
4750 if (isa<PoisonValue>(Val: Elt))
4751 continue;
4752 auto *CElt = dyn_cast<ConstantFP>(Val: Elt);
4753 if (!CElt) {
4754 Known = KnownFPClass();
4755 return;
4756 }
4757
4758 const APFloat &C = CElt->getValueAPF();
4759 Known.KnownFPClasses |= C.classify();
4760 if (C.isNegative())
4761 SignBitAllZero = false;
4762 else
4763 SignBitAllOne = false;
4764 }
4765 if (SignBitAllOne != SignBitAllZero)
4766 Known.SignBit = SignBitAllOne;
4767 return;
4768 }
4769
4770 FPClassTest KnownNotFromFlags = fcNone;
4771 if (const auto *CB = dyn_cast<CallBase>(Val: V))
4772 KnownNotFromFlags |= CB->getRetNoFPClass();
4773 else if (const auto *Arg = dyn_cast<Argument>(Val: V))
4774 KnownNotFromFlags |= Arg->getNoFPClass();
4775
4776 const Operator *Op = dyn_cast<Operator>(Val: V);
4777 if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Val: Op)) {
4778 if (FPOp->hasNoNaNs())
4779 KnownNotFromFlags |= fcNan;
4780 if (FPOp->hasNoInfs())
4781 KnownNotFromFlags |= fcInf;
4782 }
4783
4784 KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q);
4785 KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses;
4786
4787 // We no longer need to find out about these bits from inputs if we can
4788 // assume this from flags/attributes.
4789 InterestedClasses &= ~KnownNotFromFlags;
4790
4791 auto ClearClassesFromFlags = make_scope_exit(F: [=, &Known] {
4792 Known.knownNot(RuleOut: KnownNotFromFlags);
4793 if (!Known.SignBit && AssumedClasses.SignBit) {
4794 if (*AssumedClasses.SignBit)
4795 Known.signBitMustBeOne();
4796 else
4797 Known.signBitMustBeZero();
4798 }
4799 });
4800
4801 if (!Op)
4802 return;
4803
4804 // All recursive calls that increase depth must come after this.
4805 if (Depth == MaxAnalysisRecursionDepth)
4806 return;
4807
4808 const unsigned Opc = Op->getOpcode();
4809 switch (Opc) {
4810 case Instruction::FNeg: {
4811 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
4812 Known, Q, Depth: Depth + 1);
4813 Known.fneg();
4814 break;
4815 }
4816 case Instruction::Select: {
4817 Value *Cond = Op->getOperand(i: 0);
4818 Value *LHS = Op->getOperand(i: 1);
4819 Value *RHS = Op->getOperand(i: 2);
4820
4821 FPClassTest FilterLHS = fcAllFlags;
4822 FPClassTest FilterRHS = fcAllFlags;
4823
4824 Value *TestedValue = nullptr;
4825 FPClassTest MaskIfTrue = fcAllFlags;
4826 FPClassTest MaskIfFalse = fcAllFlags;
4827 uint64_t ClassVal = 0;
4828 const Function *F = cast<Instruction>(Val: Op)->getFunction();
4829 CmpPredicate Pred;
4830 Value *CmpLHS, *CmpRHS;
4831 if (F && match(V: Cond, P: m_FCmp(Pred, L: m_Value(V&: CmpLHS), R: m_Value(V&: CmpRHS)))) {
4832 // If the select filters out a value based on the class, it no longer
4833 // participates in the class of the result
4834
4835 // TODO: In some degenerate cases we can infer something if we try again
4836 // without looking through sign operations.
4837 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
4838 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
4839 fcmpImpliesClass(Pred, F: *F, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
4840 } else if (match(V: Cond,
4841 P: m_Intrinsic<Intrinsic::is_fpclass>(
4842 Op0: m_Value(V&: TestedValue), Op1: m_ConstantInt(V&: ClassVal)))) {
4843 FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal);
4844 MaskIfTrue = TestedMask;
4845 MaskIfFalse = ~TestedMask;
4846 }
4847
4848 if (TestedValue == LHS) {
4849 // match !isnan(x) ? x : y
4850 FilterLHS = MaskIfTrue;
4851 } else if (TestedValue == RHS) { // && IsExactClass
4852 // match !isnan(x) ? y : x
4853 FilterRHS = MaskIfFalse;
4854 }
4855
4856 KnownFPClass Known2;
4857 computeKnownFPClass(V: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
4858 Q, Depth: Depth + 1);
4859 Known.KnownFPClasses &= FilterLHS;
4860
4861 computeKnownFPClass(V: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
4862 Known&: Known2, Q, Depth: Depth + 1);
4863 Known2.KnownFPClasses &= FilterRHS;
4864
4865 Known |= Known2;
4866 break;
4867 }
4868 case Instruction::Call: {
4869 const CallInst *II = cast<CallInst>(Val: Op);
4870 const Intrinsic::ID IID = II->getIntrinsicID();
4871 switch (IID) {
4872 case Intrinsic::fabs: {
4873 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
4874 // If we only care about the sign bit we don't need to inspect the
4875 // operand.
4876 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts,
4877 InterestedClasses, Known, Q, Depth: Depth + 1);
4878 }
4879
4880 Known.fabs();
4881 break;
4882 }
4883 case Intrinsic::copysign: {
4884 KnownFPClass KnownSign;
4885
4886 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4887 Known, Q, Depth: Depth + 1);
4888 computeKnownFPClass(V: II->getArgOperand(i: 1), DemandedElts, InterestedClasses,
4889 Known&: KnownSign, Q, Depth: Depth + 1);
4890 Known.copysign(Sign: KnownSign);
4891 break;
4892 }
4893 case Intrinsic::fma:
4894 case Intrinsic::fmuladd: {
4895 if ((InterestedClasses & fcNegative) == fcNone)
4896 break;
4897
4898 if (II->getArgOperand(i: 0) != II->getArgOperand(i: 1))
4899 break;
4900
4901 // The multiply cannot be -0 and therefore the add can't be -0
4902 Known.knownNot(RuleOut: fcNegZero);
4903
4904 // x * x + y is non-negative if y is non-negative.
4905 KnownFPClass KnownAddend;
4906 computeKnownFPClass(V: II->getArgOperand(i: 2), DemandedElts, InterestedClasses,
4907 Known&: KnownAddend, Q, Depth: Depth + 1);
4908
4909 if (KnownAddend.cannotBeOrderedLessThanZero())
4910 Known.knownNot(RuleOut: fcNegative);
4911 break;
4912 }
4913 case Intrinsic::sqrt:
4914 case Intrinsic::experimental_constrained_sqrt: {
4915 KnownFPClass KnownSrc;
4916 FPClassTest InterestedSrcs = InterestedClasses;
4917 if (InterestedClasses & fcNan)
4918 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
4919
4920 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
4921 Known&: KnownSrc, Q, Depth: Depth + 1);
4922
4923 if (KnownSrc.isKnownNeverPosInfinity())
4924 Known.knownNot(RuleOut: fcPosInf);
4925 if (KnownSrc.isKnownNever(Mask: fcSNan))
4926 Known.knownNot(RuleOut: fcSNan);
4927
4928 // Any negative value besides -0 returns a nan.
4929 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
4930 Known.knownNot(RuleOut: fcNan);
4931
4932 // The only negative value that can be returned is -0 for -0 inputs.
4933 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
4934
4935 // If the input denormal mode could be PreserveSign, a negative
4936 // subnormal input could produce a negative zero output.
4937 const Function *F = II->getFunction();
4938 const fltSemantics &FltSem =
4939 II->getType()->getScalarType()->getFltSemantics();
4940
4941 if (Q.IIQ.hasNoSignedZeros(Op: II) ||
4942 (F &&
4943 KnownSrc.isKnownNeverLogicalNegZero(Mode: F->getDenormalMode(FPType: FltSem))))
4944 Known.knownNot(RuleOut: fcNegZero);
4945
4946 break;
4947 }
4948 case Intrinsic::sin:
4949 case Intrinsic::cos: {
4950 // Return NaN on infinite inputs.
4951 KnownFPClass KnownSrc;
4952 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4953 Known&: KnownSrc, Q, Depth: Depth + 1);
4954 Known.knownNot(RuleOut: fcInf);
4955 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
4956 Known.knownNot(RuleOut: fcNan);
4957 break;
4958 }
4959 case Intrinsic::maxnum:
4960 case Intrinsic::minnum:
4961 case Intrinsic::minimum:
4962 case Intrinsic::maximum:
4963 case Intrinsic::minimumnum:
4964 case Intrinsic::maximumnum: {
4965 KnownFPClass KnownLHS, KnownRHS;
4966 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
4967 Known&: KnownLHS, Q, Depth: Depth + 1);
4968 computeKnownFPClass(V: II->getArgOperand(i: 1), DemandedElts, InterestedClasses,
4969 Known&: KnownRHS, Q, Depth: Depth + 1);
4970
4971 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
4972 Known = KnownLHS | KnownRHS;
4973
4974 // If either operand is not NaN, the result is not NaN.
4975 if (NeverNaN &&
4976 (IID == Intrinsic::minnum || IID == Intrinsic::maxnum ||
4977 IID == Intrinsic::minimumnum || IID == Intrinsic::maximumnum))
4978 Known.knownNot(RuleOut: fcNan);
4979
4980 if (IID == Intrinsic::maxnum || IID == Intrinsic::maximumnum) {
4981 // If at least one operand is known to be positive, the result must be
4982 // positive.
4983 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
4984 KnownLHS.isKnownNeverNaN()) ||
4985 (KnownRHS.cannotBeOrderedLessThanZero() &&
4986 KnownRHS.isKnownNeverNaN()))
4987 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4988 } else if (IID == Intrinsic::maximum) {
4989 // If at least one operand is known to be positive, the result must be
4990 // positive.
4991 if (KnownLHS.cannotBeOrderedLessThanZero() ||
4992 KnownRHS.cannotBeOrderedLessThanZero())
4993 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
4994 } else if (IID == Intrinsic::minnum || IID == Intrinsic::minimumnum) {
4995 // If at least one operand is known to be negative, the result must be
4996 // negative.
4997 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
4998 KnownLHS.isKnownNeverNaN()) ||
4999 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
5000 KnownRHS.isKnownNeverNaN()))
5001 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5002 } else if (IID == Intrinsic::minimum) {
5003 // If at least one operand is known to be negative, the result must be
5004 // negative.
5005 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
5006 KnownRHS.cannotBeOrderedGreaterThanZero())
5007 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5008 } else
5009 llvm_unreachable("unhandled intrinsic");
5010
5011 // Fixup zero handling if denormals could be returned as a zero.
5012 //
5013 // As there's no spec for denormal flushing, be conservative with the
5014 // treatment of denormals that could be flushed to zero. For older
5015 // subtargets on AMDGPU the min/max instructions would not flush the
5016 // output and return the original value.
5017 //
5018 if ((Known.KnownFPClasses & fcZero) != fcNone &&
5019 !Known.isKnownNeverSubnormal()) {
5020 const Function *Parent = II->getFunction();
5021 if (!Parent)
5022 break;
5023
5024 DenormalMode Mode = Parent->getDenormalMode(
5025 FPType: II->getType()->getScalarType()->getFltSemantics());
5026 if (Mode != DenormalMode::getIEEE())
5027 Known.KnownFPClasses |= fcZero;
5028 }
5029
5030 if (Known.isKnownNeverNaN()) {
5031 if (KnownLHS.SignBit && KnownRHS.SignBit &&
5032 *KnownLHS.SignBit == *KnownRHS.SignBit) {
5033 if (*KnownLHS.SignBit)
5034 Known.signBitMustBeOne();
5035 else
5036 Known.signBitMustBeZero();
5037 } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum ||
5038 IID == Intrinsic::maximumnum ||
5039 IID == Intrinsic::minimumnum) ||
5040 // FIXME: Should be using logical zero versions
5041 ((KnownLHS.isKnownNeverNegZero() ||
5042 KnownRHS.isKnownNeverPosZero()) &&
5043 (KnownLHS.isKnownNeverPosZero() ||
5044 KnownRHS.isKnownNeverNegZero()))) {
5045 if ((IID == Intrinsic::maximum || IID == Intrinsic::maximumnum ||
5046 IID == Intrinsic::maxnum) &&
5047 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
5048 Known.signBitMustBeZero();
5049 else if ((IID == Intrinsic::minimum || IID == Intrinsic::minimumnum ||
5050 IID == Intrinsic::minnum) &&
5051 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
5052 Known.signBitMustBeOne();
5053 }
5054 }
5055 break;
5056 }
5057 case Intrinsic::canonicalize: {
5058 KnownFPClass KnownSrc;
5059 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5060 Known&: KnownSrc, Q, Depth: Depth + 1);
5061
5062 // This is essentially a stronger form of
5063 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5064 // actually have an IR canonicalization guarantee.
5065
5066 // Canonicalize may flush denormals to zero, so we have to consider the
5067 // denormal mode to preserve known-not-0 knowledge.
5068 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
5069
5070 // Stronger version of propagateNaN
5071 // Canonicalize is guaranteed to quiet signaling nans.
5072 if (KnownSrc.isKnownNeverNaN())
5073 Known.knownNot(RuleOut: fcNan);
5074 else
5075 Known.knownNot(RuleOut: fcSNan);
5076
5077 const Function *F = II->getFunction();
5078 if (!F)
5079 break;
5080
5081 // If the parent function flushes denormals, the canonical output cannot
5082 // be a denormal.
5083 const fltSemantics &FPType =
5084 II->getType()->getScalarType()->getFltSemantics();
5085 DenormalMode DenormMode = F->getDenormalMode(FPType);
5086 if (DenormMode == DenormalMode::getIEEE()) {
5087 if (KnownSrc.isKnownNever(Mask: fcPosZero))
5088 Known.knownNot(RuleOut: fcPosZero);
5089 if (KnownSrc.isKnownNever(Mask: fcNegZero))
5090 Known.knownNot(RuleOut: fcNegZero);
5091 break;
5092 }
5093
5094 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
5095 Known.knownNot(RuleOut: fcSubnormal);
5096
5097 if (DenormMode.Input == DenormalMode::PositiveZero ||
5098 (DenormMode.Output == DenormalMode::PositiveZero &&
5099 DenormMode.Input == DenormalMode::IEEE))
5100 Known.knownNot(RuleOut: fcNegZero);
5101
5102 break;
5103 }
5104 case Intrinsic::vector_reduce_fmax:
5105 case Intrinsic::vector_reduce_fmin:
5106 case Intrinsic::vector_reduce_fmaximum:
5107 case Intrinsic::vector_reduce_fminimum: {
5108 // reduce min/max will choose an element from one of the vector elements,
5109 // so we can infer and class information that is common to all elements.
5110 Known = computeKnownFPClass(V: II->getArgOperand(i: 0), FMF: II->getFastMathFlags(),
5111 InterestedClasses, SQ: Q, Depth: Depth + 1);
5112 // Can only propagate sign if output is never NaN.
5113 if (!Known.isKnownNeverNaN())
5114 Known.SignBit.reset();
5115 break;
5116 }
5117 // reverse preserves all characteristics of the input vec's element.
5118 case Intrinsic::vector_reverse:
5119 Known = computeKnownFPClass(
5120 V: II->getArgOperand(i: 0), DemandedElts: DemandedElts.reverseBits(),
5121 FMF: II->getFastMathFlags(), InterestedClasses, SQ: Q, Depth: Depth + 1);
5122 break;
5123 case Intrinsic::trunc:
5124 case Intrinsic::floor:
5125 case Intrinsic::ceil:
5126 case Intrinsic::rint:
5127 case Intrinsic::nearbyint:
5128 case Intrinsic::round:
5129 case Intrinsic::roundeven: {
5130 KnownFPClass KnownSrc;
5131 FPClassTest InterestedSrcs = InterestedClasses;
5132 if (InterestedSrcs & fcPosFinite)
5133 InterestedSrcs |= fcPosFinite;
5134 if (InterestedSrcs & fcNegFinite)
5135 InterestedSrcs |= fcNegFinite;
5136 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5137 Known&: KnownSrc, Q, Depth: Depth + 1);
5138
5139 // Integer results cannot be subnormal.
5140 Known.knownNot(RuleOut: fcSubnormal);
5141
5142 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
5143
5144 // Pass through infinities, except PPC_FP128 is a special case for
5145 // intrinsics other than trunc.
5146 if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
5147 if (KnownSrc.isKnownNeverPosInfinity())
5148 Known.knownNot(RuleOut: fcPosInf);
5149 if (KnownSrc.isKnownNeverNegInfinity())
5150 Known.knownNot(RuleOut: fcNegInf);
5151 }
5152
5153 // Negative round ups to 0 produce -0
5154 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
5155 Known.knownNot(RuleOut: fcPosFinite);
5156 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
5157 Known.knownNot(RuleOut: fcNegFinite);
5158
5159 break;
5160 }
5161 case Intrinsic::exp:
5162 case Intrinsic::exp2:
5163 case Intrinsic::exp10: {
5164 Known.knownNot(RuleOut: fcNegative);
5165 if ((InterestedClasses & fcNan) == fcNone)
5166 break;
5167
5168 KnownFPClass KnownSrc;
5169 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5170 Known&: KnownSrc, Q, Depth: Depth + 1);
5171 if (KnownSrc.isKnownNeverNaN()) {
5172 Known.knownNot(RuleOut: fcNan);
5173 Known.signBitMustBeZero();
5174 }
5175
5176 break;
5177 }
5178 case Intrinsic::fptrunc_round: {
5179 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5180 Q, Depth);
5181 break;
5182 }
5183 case Intrinsic::log:
5184 case Intrinsic::log10:
5185 case Intrinsic::log2:
5186 case Intrinsic::experimental_constrained_log:
5187 case Intrinsic::experimental_constrained_log10:
5188 case Intrinsic::experimental_constrained_log2: {
5189 // log(+inf) -> +inf
5190 // log([+-]0.0) -> -inf
5191 // log(-inf) -> nan
5192 // log(-x) -> nan
5193 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
5194 break;
5195
5196 FPClassTest InterestedSrcs = InterestedClasses;
5197 if ((InterestedClasses & fcNegInf) != fcNone)
5198 InterestedSrcs |= fcZero | fcSubnormal;
5199 if ((InterestedClasses & fcNan) != fcNone)
5200 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
5201
5202 KnownFPClass KnownSrc;
5203 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5204 Known&: KnownSrc, Q, Depth: Depth + 1);
5205
5206 if (KnownSrc.isKnownNeverPosInfinity())
5207 Known.knownNot(RuleOut: fcPosInf);
5208
5209 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
5210 Known.knownNot(RuleOut: fcNan);
5211
5212 const Function *F = II->getFunction();
5213
5214 if (!F)
5215 break;
5216
5217 const fltSemantics &FltSem =
5218 II->getType()->getScalarType()->getFltSemantics();
5219 DenormalMode Mode = F->getDenormalMode(FPType: FltSem);
5220
5221 if (KnownSrc.isKnownNeverLogicalZero(Mode))
5222 Known.knownNot(RuleOut: fcNegInf);
5223
5224 break;
5225 }
5226 case Intrinsic::powi: {
5227 if ((InterestedClasses & fcNegative) == fcNone)
5228 break;
5229
5230 const Value *Exp = II->getArgOperand(i: 1);
5231 Type *ExpTy = Exp->getType();
5232 unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
5233 KnownBits ExponentKnownBits(BitWidth);
5234 computeKnownBits(V: Exp, DemandedElts: isa<VectorType>(Val: ExpTy) ? DemandedElts : APInt(1, 1),
5235 Known&: ExponentKnownBits, Q, Depth: Depth + 1);
5236
5237 if (ExponentKnownBits.Zero[0]) { // Is even
5238 Known.knownNot(RuleOut: fcNegative);
5239 break;
5240 }
5241
5242 // Given that exp is an integer, here are the
5243 // ways that pow can return a negative value:
5244 //
5245 // pow(-x, exp) --> negative if exp is odd and x is negative.
5246 // pow(-0, exp) --> -inf if exp is negative odd.
5247 // pow(-0, exp) --> -0 if exp is positive odd.
5248 // pow(-inf, exp) --> -0 if exp is negative odd.
5249 // pow(-inf, exp) --> -inf if exp is positive odd.
5250 KnownFPClass KnownSrc;
5251 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses: fcNegative,
5252 Known&: KnownSrc, Q, Depth: Depth + 1);
5253 if (KnownSrc.isKnownNever(Mask: fcNegative))
5254 Known.knownNot(RuleOut: fcNegative);
5255 break;
5256 }
5257 case Intrinsic::ldexp: {
5258 KnownFPClass KnownSrc;
5259 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5260 Known&: KnownSrc, Q, Depth: Depth + 1);
5261 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
5262
5263 // Sign is preserved, but underflows may produce zeroes.
5264 if (KnownSrc.isKnownNever(Mask: fcNegative))
5265 Known.knownNot(RuleOut: fcNegative);
5266 else if (KnownSrc.cannotBeOrderedLessThanZero())
5267 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5268
5269 if (KnownSrc.isKnownNever(Mask: fcPositive))
5270 Known.knownNot(RuleOut: fcPositive);
5271 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
5272 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5273
5274 // Can refine inf/zero handling based on the exponent operand.
5275 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
5276 if ((InterestedClasses & ExpInfoMask) == fcNone)
5277 break;
5278 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
5279 break;
5280
5281 const fltSemantics &Flt =
5282 II->getType()->getScalarType()->getFltSemantics();
5283 unsigned Precision = APFloat::semanticsPrecision(Flt);
5284 const Value *ExpArg = II->getArgOperand(i: 1);
5285 ConstantRange ExpRange = computeConstantRange(
5286 V: ExpArg, ForSigned: true, UseInstrInfo: Q.IIQ.UseInstrInfo, AC: Q.AC, CtxI: Q.CxtI, DT: Q.DT, Depth: Depth + 1);
5287
5288 const int MantissaBits = Precision - 1;
5289 if (ExpRange.getSignedMin().sge(RHS: static_cast<int64_t>(MantissaBits)))
5290 Known.knownNot(RuleOut: fcSubnormal);
5291
5292 const Function *F = II->getFunction();
5293 const APInt *ConstVal = ExpRange.getSingleElement();
5294 const fltSemantics &FltSem =
5295 II->getType()->getScalarType()->getFltSemantics();
5296 if (ConstVal && ConstVal->isZero()) {
5297 // ldexp(x, 0) -> x, so propagate everything.
5298 Known.propagateCanonicalizingSrc(Src: KnownSrc, Mode: F->getDenormalMode(FPType: FltSem));
5299 } else if (ExpRange.isAllNegative()) {
5300 // If we know the power is <= 0, can't introduce inf
5301 if (KnownSrc.isKnownNeverPosInfinity())
5302 Known.knownNot(RuleOut: fcPosInf);
5303 if (KnownSrc.isKnownNeverNegInfinity())
5304 Known.knownNot(RuleOut: fcNegInf);
5305 } else if (ExpRange.isAllNonNegative()) {
5306 // If we know the power is >= 0, can't introduce subnormal or zero
5307 if (KnownSrc.isKnownNeverPosSubnormal())
5308 Known.knownNot(RuleOut: fcPosSubnormal);
5309 if (KnownSrc.isKnownNeverNegSubnormal())
5310 Known.knownNot(RuleOut: fcNegSubnormal);
5311 if (F &&
5312 KnownSrc.isKnownNeverLogicalPosZero(Mode: F->getDenormalMode(FPType: FltSem)))
5313 Known.knownNot(RuleOut: fcPosZero);
5314 if (F &&
5315 KnownSrc.isKnownNeverLogicalNegZero(Mode: F->getDenormalMode(FPType: FltSem)))
5316 Known.knownNot(RuleOut: fcNegZero);
5317 }
5318
5319 break;
5320 }
5321 case Intrinsic::arithmetic_fence: {
5322 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts, InterestedClasses,
5323 Known, Q, Depth: Depth + 1);
5324 break;
5325 }
5326 case Intrinsic::experimental_constrained_sitofp:
5327 case Intrinsic::experimental_constrained_uitofp:
5328 // Cannot produce nan
5329 Known.knownNot(RuleOut: fcNan);
5330
5331 // sitofp and uitofp turn into +0.0 for zero.
5332 Known.knownNot(RuleOut: fcNegZero);
5333
5334 // Integers cannot be subnormal
5335 Known.knownNot(RuleOut: fcSubnormal);
5336
5337 if (IID == Intrinsic::experimental_constrained_uitofp)
5338 Known.signBitMustBeZero();
5339
5340 // TODO: Copy inf handling from instructions
5341 break;
5342 default:
5343 break;
5344 }
5345
5346 break;
5347 }
5348 case Instruction::FAdd:
5349 case Instruction::FSub: {
5350 KnownFPClass KnownLHS, KnownRHS;
5351 bool WantNegative =
5352 Op->getOpcode() == Instruction::FAdd &&
5353 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
5354 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
5355 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
5356
5357 if (!WantNaN && !WantNegative && !WantNegZero)
5358 break;
5359
5360 FPClassTest InterestedSrcs = InterestedClasses;
5361 if (WantNegative)
5362 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5363 if (InterestedClasses & fcNan)
5364 InterestedSrcs |= fcInf;
5365 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts, InterestedClasses: InterestedSrcs,
5366 Known&: KnownRHS, Q, Depth: Depth + 1);
5367
5368 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
5369 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
5370 WantNegZero || Opc == Instruction::FSub) {
5371
5372 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5373 // there's no point.
5374 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses: InterestedSrcs,
5375 Known&: KnownLHS, Q, Depth: Depth + 1);
5376 // Adding positive and negative infinity produces NaN.
5377 // TODO: Check sign of infinities.
5378 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5379 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
5380 Known.knownNot(RuleOut: fcNan);
5381
5382 // FIXME: Context function should always be passed in separately
5383 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5384
5385 if (Op->getOpcode() == Instruction::FAdd) {
5386 if (KnownLHS.cannotBeOrderedLessThanZero() &&
5387 KnownRHS.cannotBeOrderedLessThanZero())
5388 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5389 if (!F)
5390 break;
5391
5392 const fltSemantics &FltSem =
5393 Op->getType()->getScalarType()->getFltSemantics();
5394 DenormalMode Mode = F->getDenormalMode(FPType: FltSem);
5395
5396 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5397 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
5398 KnownRHS.isKnownNeverLogicalNegZero(Mode)) &&
5399 // Make sure output negative denormal can't flush to -0
5400 outputDenormalIsIEEEOrPosZero(F: *F, Ty: Op->getType()))
5401 Known.knownNot(RuleOut: fcNegZero);
5402 } else {
5403 if (!F)
5404 break;
5405
5406 const fltSemantics &FltSem =
5407 Op->getType()->getScalarType()->getFltSemantics();
5408 DenormalMode Mode = F->getDenormalMode(FPType: FltSem);
5409
5410 // Only fsub -0, +0 can return -0
5411 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
5412 KnownRHS.isKnownNeverLogicalPosZero(Mode)) &&
5413 // Make sure output negative denormal can't flush to -0
5414 outputDenormalIsIEEEOrPosZero(F: *F, Ty: Op->getType()))
5415 Known.knownNot(RuleOut: fcNegZero);
5416 }
5417 }
5418
5419 break;
5420 }
5421 case Instruction::FMul: {
5422 // X * X is always non-negative or a NaN.
5423 if (Op->getOperand(i: 0) == Op->getOperand(i: 1))
5424 Known.knownNot(RuleOut: fcNegative);
5425
5426 if ((InterestedClasses & fcNan) != fcNan)
5427 break;
5428
5429 // fcSubnormal is only needed in case of DAZ.
5430 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
5431
5432 KnownFPClass KnownLHS, KnownRHS;
5433 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS,
5434 Q, Depth: Depth + 1);
5435 if (!KnownRHS.isKnownNeverNaN())
5436 break;
5437
5438 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS,
5439 Q, Depth: Depth + 1);
5440 if (!KnownLHS.isKnownNeverNaN())
5441 break;
5442
5443 if (KnownLHS.SignBit && KnownRHS.SignBit) {
5444 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
5445 Known.signBitMustBeZero();
5446 else
5447 Known.signBitMustBeOne();
5448 }
5449
5450 // If 0 * +/-inf produces NaN.
5451 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
5452 Known.knownNot(RuleOut: fcNan);
5453 break;
5454 }
5455
5456 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5457 if (!F)
5458 break;
5459
5460 Type *OpTy = Op->getType()->getScalarType();
5461 const fltSemantics &FltSem = OpTy->getFltSemantics();
5462 DenormalMode Mode = F->getDenormalMode(FPType: FltSem);
5463
5464 if ((KnownRHS.isKnownNeverInfinity() ||
5465 KnownLHS.isKnownNeverLogicalZero(Mode)) &&
5466 (KnownLHS.isKnownNeverInfinity() ||
5467 KnownRHS.isKnownNeverLogicalZero(Mode)))
5468 Known.knownNot(RuleOut: fcNan);
5469
5470 break;
5471 }
5472 case Instruction::FDiv:
5473 case Instruction::FRem: {
5474 if (Op->getOperand(i: 0) == Op->getOperand(i: 1)) {
5475 // TODO: Could filter out snan if we inspect the operand
5476 if (Op->getOpcode() == Instruction::FDiv) {
5477 // X / X is always exactly 1.0 or a NaN.
5478 Known.KnownFPClasses = fcNan | fcPosNormal;
5479 } else {
5480 // X % X is always exactly [+-]0.0 or a NaN.
5481 Known.KnownFPClasses = fcNan | fcZero;
5482 }
5483
5484 break;
5485 }
5486
5487 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
5488 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
5489 const bool WantPositive =
5490 Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone;
5491 if (!WantNan && !WantNegative && !WantPositive)
5492 break;
5493
5494 KnownFPClass KnownLHS, KnownRHS;
5495
5496 computeKnownFPClass(V: Op->getOperand(i: 1), DemandedElts,
5497 InterestedClasses: fcNan | fcInf | fcZero | fcNegative, Known&: KnownRHS, Q,
5498 Depth: Depth + 1);
5499
5500 bool KnowSomethingUseful =
5501 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
5502
5503 if (KnowSomethingUseful || WantPositive) {
5504 const FPClassTest InterestedLHS =
5505 WantPositive ? fcAllFlags
5506 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
5507
5508 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts,
5509 InterestedClasses: InterestedClasses & InterestedLHS, Known&: KnownLHS, Q,
5510 Depth: Depth + 1);
5511 }
5512
5513 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5514 const fltSemantics &FltSem =
5515 Op->getType()->getScalarType()->getFltSemantics();
5516
5517 if (Op->getOpcode() == Instruction::FDiv) {
5518 // Only 0/0, Inf/Inf produce NaN.
5519 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5520 (KnownLHS.isKnownNeverInfinity() ||
5521 KnownRHS.isKnownNeverInfinity()) &&
5522 ((F &&
5523 KnownLHS.isKnownNeverLogicalZero(Mode: F->getDenormalMode(FPType: FltSem))) ||
5524 (F &&
5525 KnownRHS.isKnownNeverLogicalZero(Mode: F->getDenormalMode(FPType: FltSem))))) {
5526 Known.knownNot(RuleOut: fcNan);
5527 }
5528
5529 // X / -0.0 is -Inf (or NaN).
5530 // +X / +X is +X
5531 if (KnownLHS.isKnownNever(Mask: fcNegative) && KnownRHS.isKnownNever(Mask: fcNegative))
5532 Known.knownNot(RuleOut: fcNegative);
5533 } else {
5534 // Inf REM x and x REM 0 produce NaN.
5535 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
5536 KnownLHS.isKnownNeverInfinity() && F &&
5537 KnownRHS.isKnownNeverLogicalZero(Mode: F->getDenormalMode(FPType: FltSem))) {
5538 Known.knownNot(RuleOut: fcNan);
5539 }
5540
5541 // The sign for frem is the same as the first operand.
5542 if (KnownLHS.cannotBeOrderedLessThanZero())
5543 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
5544 if (KnownLHS.cannotBeOrderedGreaterThanZero())
5545 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
5546
5547 // See if we can be more aggressive about the sign of 0.
5548 if (KnownLHS.isKnownNever(Mask: fcNegative))
5549 Known.knownNot(RuleOut: fcNegative);
5550 if (KnownLHS.isKnownNever(Mask: fcPositive))
5551 Known.knownNot(RuleOut: fcPositive);
5552 }
5553
5554 break;
5555 }
5556 case Instruction::FPExt: {
5557 // Infinity, nan and zero propagate from source.
5558 computeKnownFPClass(V: Op->getOperand(i: 0), DemandedElts, InterestedClasses,
5559 Known, Q, Depth: Depth + 1);
5560
5561 const fltSemantics &DstTy =
5562 Op->getType()->getScalarType()->getFltSemantics();
5563 const fltSemantics &SrcTy =
5564 Op->getOperand(i: 0)->getType()->getScalarType()->getFltSemantics();
5565
5566 // All subnormal inputs should be in the normal range in the result type.
5567 if (APFloat::isRepresentableAsNormalIn(Src: SrcTy, Dst: DstTy)) {
5568 if (Known.KnownFPClasses & fcPosSubnormal)
5569 Known.KnownFPClasses |= fcPosNormal;
5570 if (Known.KnownFPClasses & fcNegSubnormal)
5571 Known.KnownFPClasses |= fcNegNormal;
5572 Known.knownNot(RuleOut: fcSubnormal);
5573 }
5574
5575 // Sign bit of a nan isn't guaranteed.
5576 if (!Known.isKnownNeverNaN())
5577 Known.SignBit = std::nullopt;
5578 break;
5579 }
5580 case Instruction::FPTrunc: {
5581 computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, Q,
5582 Depth);
5583 break;
5584 }
5585 case Instruction::SIToFP:
5586 case Instruction::UIToFP: {
5587 // Cannot produce nan
5588 Known.knownNot(RuleOut: fcNan);
5589
5590 // Integers cannot be subnormal
5591 Known.knownNot(RuleOut: fcSubnormal);
5592
5593 // sitofp and uitofp turn into +0.0 for zero.
5594 Known.knownNot(RuleOut: fcNegZero);
5595 if (Op->getOpcode() == Instruction::UIToFP)
5596 Known.signBitMustBeZero();
5597
5598 if (InterestedClasses & fcInf) {
5599 // Get width of largest magnitude integer (remove a bit if signed).
5600 // This still works for a signed minimum value because the largest FP
5601 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5602 int IntSize = Op->getOperand(i: 0)->getType()->getScalarSizeInBits();
5603 if (Op->getOpcode() == Instruction::SIToFP)
5604 --IntSize;
5605
5606 // If the exponent of the largest finite FP value can hold the largest
5607 // integer, the result of the cast must be finite.
5608 Type *FPTy = Op->getType()->getScalarType();
5609 if (ilogb(Arg: APFloat::getLargest(Sem: FPTy->getFltSemantics())) >= IntSize)
5610 Known.knownNot(RuleOut: fcInf);
5611 }
5612
5613 break;
5614 }
5615 case Instruction::ExtractElement: {
5616 // Look through extract element. If the index is non-constant or
5617 // out-of-range demand all elements, otherwise just the extracted element.
5618 const Value *Vec = Op->getOperand(i: 0);
5619
5620 APInt DemandedVecElts;
5621 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType())) {
5622 unsigned NumElts = VecTy->getNumElements();
5623 DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
5624 auto *CIdx = dyn_cast<ConstantInt>(Val: Op->getOperand(i: 1));
5625 if (CIdx && CIdx->getValue().ult(RHS: NumElts))
5626 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
5627 } else {
5628 DemandedVecElts = APInt(1, 1);
5629 }
5630
5631 return computeKnownFPClass(V: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
5632 Q, Depth: Depth + 1);
5633 }
5634 case Instruction::InsertElement: {
5635 if (isa<ScalableVectorType>(Val: Op->getType()))
5636 return;
5637
5638 const Value *Vec = Op->getOperand(i: 0);
5639 const Value *Elt = Op->getOperand(i: 1);
5640 auto *CIdx = dyn_cast<ConstantInt>(Val: Op->getOperand(i: 2));
5641 unsigned NumElts = DemandedElts.getBitWidth();
5642 APInt DemandedVecElts = DemandedElts;
5643 bool NeedsElt = true;
5644 // If we know the index we are inserting to, clear it from Vec check.
5645 if (CIdx && CIdx->getValue().ult(RHS: NumElts)) {
5646 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
5647 NeedsElt = DemandedElts[CIdx->getZExtValue()];
5648 }
5649
5650 // Do we demand the inserted element?
5651 if (NeedsElt) {
5652 computeKnownFPClass(V: Elt, Known, InterestedClasses, Q, Depth: Depth + 1);
5653 // If we don't know any bits, early out.
5654 if (Known.isUnknown())
5655 break;
5656 } else {
5657 Known.KnownFPClasses = fcNone;
5658 }
5659
5660 // Do we need anymore elements from Vec?
5661 if (!DemandedVecElts.isZero()) {
5662 KnownFPClass Known2;
5663 computeKnownFPClass(V: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2, Q,
5664 Depth: Depth + 1);
5665 Known |= Known2;
5666 }
5667
5668 break;
5669 }
5670 case Instruction::ShuffleVector: {
5671 // For undef elements, we don't know anything about the common state of
5672 // the shuffle result.
5673 APInt DemandedLHS, DemandedRHS;
5674 auto *Shuf = dyn_cast<ShuffleVectorInst>(Val: Op);
5675 if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
5676 return;
5677
5678 if (!!DemandedLHS) {
5679 const Value *LHS = Shuf->getOperand(i_nocapture: 0);
5680 computeKnownFPClass(V: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known, Q,
5681 Depth: Depth + 1);
5682
5683 // If we don't know any bits, early out.
5684 if (Known.isUnknown())
5685 break;
5686 } else {
5687 Known.KnownFPClasses = fcNone;
5688 }
5689
5690 if (!!DemandedRHS) {
5691 KnownFPClass Known2;
5692 const Value *RHS = Shuf->getOperand(i_nocapture: 1);
5693 computeKnownFPClass(V: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2, Q,
5694 Depth: Depth + 1);
5695 Known |= Known2;
5696 }
5697
5698 break;
5699 }
5700 case Instruction::ExtractValue: {
5701 const ExtractValueInst *Extract = cast<ExtractValueInst>(Val: Op);
5702 ArrayRef<unsigned> Indices = Extract->getIndices();
5703 const Value *Src = Extract->getAggregateOperand();
5704 if (isa<StructType>(Val: Src->getType()) && Indices.size() == 1 &&
5705 Indices[0] == 0) {
5706 if (const auto *II = dyn_cast<IntrinsicInst>(Val: Src)) {
5707 switch (II->getIntrinsicID()) {
5708 case Intrinsic::frexp: {
5709 Known.knownNot(RuleOut: fcSubnormal);
5710
5711 KnownFPClass KnownSrc;
5712 computeKnownFPClass(V: II->getArgOperand(i: 0), DemandedElts,
5713 InterestedClasses, Known&: KnownSrc, Q, Depth: Depth + 1);
5714
5715 const Function *F = cast<Instruction>(Val: Op)->getFunction();
5716 const fltSemantics &FltSem =
5717 Op->getType()->getScalarType()->getFltSemantics();
5718
5719 if (KnownSrc.isKnownNever(Mask: fcNegative))
5720 Known.knownNot(RuleOut: fcNegative);
5721 else {
5722 if (F &&
5723 KnownSrc.isKnownNeverLogicalNegZero(Mode: F->getDenormalMode(FPType: FltSem)))
5724 Known.knownNot(RuleOut: fcNegZero);
5725 if (KnownSrc.isKnownNever(Mask: fcNegInf))
5726 Known.knownNot(RuleOut: fcNegInf);
5727 }
5728
5729 if (KnownSrc.isKnownNever(Mask: fcPositive))
5730 Known.knownNot(RuleOut: fcPositive);
5731 else {
5732 if (F &&
5733 KnownSrc.isKnownNeverLogicalPosZero(Mode: F->getDenormalMode(FPType: FltSem)))
5734 Known.knownNot(RuleOut: fcPosZero);
5735 if (KnownSrc.isKnownNever(Mask: fcPosInf))
5736 Known.knownNot(RuleOut: fcPosInf);
5737 }
5738
5739 Known.propagateNaN(Src: KnownSrc);
5740 return;
5741 }
5742 default:
5743 break;
5744 }
5745 }
5746 }
5747
5748 computeKnownFPClass(V: Src, DemandedElts, InterestedClasses, Known, Q,
5749 Depth: Depth + 1);
5750 break;
5751 }
5752 case Instruction::PHI: {
5753 const PHINode *P = cast<PHINode>(Val: Op);
5754 // Unreachable blocks may have zero-operand PHI nodes.
5755 if (P->getNumIncomingValues() == 0)
5756 break;
5757
5758 // Otherwise take the unions of the known bit sets of the operands,
5759 // taking conservative care to avoid excessive recursion.
5760 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
5761
5762 if (Depth < PhiRecursionLimit) {
5763 // Skip if every incoming value references to ourself.
5764 if (isa_and_nonnull<UndefValue>(Val: P->hasConstantValue()))
5765 break;
5766
5767 bool First = true;
5768
5769 for (const Use &U : P->operands()) {
5770 Value *IncValue;
5771 Instruction *CxtI;
5772 breakSelfRecursivePHI(U: &U, PHI: P, ValOut&: IncValue, CtxIOut&: CxtI);
5773 // Skip direct self references.
5774 if (IncValue == P)
5775 continue;
5776
5777 KnownFPClass KnownSrc;
5778 // Recurse, but cap the recursion to two levels, because we don't want
5779 // to waste time spinning around in loops. We need at least depth 2 to
5780 // detect known sign bits.
5781 computeKnownFPClass(V: IncValue, DemandedElts, InterestedClasses, Known&: KnownSrc,
5782 Q: Q.getWithoutCondContext().getWithInstruction(I: CxtI),
5783 Depth: PhiRecursionLimit);
5784
5785 if (First) {
5786 Known = KnownSrc;
5787 First = false;
5788 } else {
5789 Known |= KnownSrc;
5790 }
5791
5792 if (Known.KnownFPClasses == fcAllFlags)
5793 break;
5794 }
5795 }
5796
5797 break;
5798 }
5799 case Instruction::BitCast: {
5800 const Value *Src;
5801 if (!match(V: Op, P: m_ElementWiseBitCast(Op: m_Value(V&: Src))) ||
5802 !Src->getType()->isIntOrIntVectorTy())
5803 break;
5804
5805 const Type *Ty = Op->getType()->getScalarType();
5806 KnownBits Bits(Ty->getScalarSizeInBits());
5807 computeKnownBits(V: Src, DemandedElts, Known&: Bits, Q, Depth: Depth + 1);
5808
5809 // Transfer information from the sign bit.
5810 if (Bits.isNonNegative())
5811 Known.signBitMustBeZero();
5812 else if (Bits.isNegative())
5813 Known.signBitMustBeOne();
5814
5815 if (Ty->isIEEELikeFPTy()) {
5816 // IEEE floats are NaN when all bits of the exponent plus at least one of
5817 // the fraction bits are 1. This means:
5818 // - If we assume unknown bits are 0 and the value is NaN, it will
5819 // always be NaN
5820 // - If we assume unknown bits are 1 and the value is not NaN, it can
5821 // never be NaN
5822 // Note: They do not hold for x86_fp80 format.
5823 if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
5824 Known.KnownFPClasses = fcNan;
5825 else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
5826 Known.knownNot(RuleOut: fcNan);
5827
5828 // Build KnownBits representing Inf and check if it must be equal or
5829 // unequal to this value.
5830 auto InfKB = KnownBits::makeConstant(
5831 C: APFloat::getInf(Sem: Ty->getFltSemantics()).bitcastToAPInt());
5832 InfKB.Zero.clearSignBit();
5833 if (const auto InfResult = KnownBits::eq(LHS: Bits, RHS: InfKB)) {
5834 assert(!InfResult.value());
5835 Known.knownNot(RuleOut: fcInf);
5836 } else if (Bits == InfKB) {
5837 Known.KnownFPClasses = fcInf;
5838 }
5839
5840 // Build KnownBits representing Zero and check if it must be equal or
5841 // unequal to this value.
5842 auto ZeroKB = KnownBits::makeConstant(
5843 C: APFloat::getZero(Sem: Ty->getFltSemantics()).bitcastToAPInt());
5844 ZeroKB.Zero.clearSignBit();
5845 if (const auto ZeroResult = KnownBits::eq(LHS: Bits, RHS: ZeroKB)) {
5846 assert(!ZeroResult.value());
5847 Known.knownNot(RuleOut: fcZero);
5848 } else if (Bits == ZeroKB) {
5849 Known.KnownFPClasses = fcZero;
5850 }
5851 }
5852
5853 break;
5854 }
5855 default:
5856 break;
5857 }
5858}
5859
5860KnownFPClass llvm::computeKnownFPClass(const Value *V,
5861 const APInt &DemandedElts,
5862 FPClassTest InterestedClasses,
5863 const SimplifyQuery &SQ,
5864 unsigned Depth) {
5865 KnownFPClass KnownClasses;
5866 ::computeKnownFPClass(V, DemandedElts, InterestedClasses, Known&: KnownClasses, Q: SQ,
5867 Depth);
5868 return KnownClasses;
5869}
5870
5871KnownFPClass llvm::computeKnownFPClass(const Value *V,
5872 FPClassTest InterestedClasses,
5873 const SimplifyQuery &SQ,
5874 unsigned Depth) {
5875 KnownFPClass Known;
5876 ::computeKnownFPClass(V, Known, InterestedClasses, Q: SQ, Depth);
5877 return Known;
5878}
5879
5880KnownFPClass llvm::computeKnownFPClass(
5881 const Value *V, const DataLayout &DL, FPClassTest InterestedClasses,
5882 const TargetLibraryInfo *TLI, AssumptionCache *AC, const Instruction *CxtI,
5883 const DominatorTree *DT, bool UseInstrInfo, unsigned Depth) {
5884 return computeKnownFPClass(V, InterestedClasses,
5885 SQ: SimplifyQuery(DL, TLI, DT, AC, CxtI, UseInstrInfo),
5886 Depth);
5887}
5888
5889KnownFPClass
5890llvm::computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5891 FastMathFlags FMF, FPClassTest InterestedClasses,
5892 const SimplifyQuery &SQ, unsigned Depth) {
5893 if (FMF.noNaNs())
5894 InterestedClasses &= ~fcNan;
5895 if (FMF.noInfs())
5896 InterestedClasses &= ~fcInf;
5897
5898 KnownFPClass Result =
5899 computeKnownFPClass(V, DemandedElts, InterestedClasses, SQ, Depth);
5900
5901 if (FMF.noNaNs())
5902 Result.KnownFPClasses &= ~fcNan;
5903 if (FMF.noInfs())
5904 Result.KnownFPClasses &= ~fcInf;
5905 return Result;
5906}
5907
5908KnownFPClass llvm::computeKnownFPClass(const Value *V, FastMathFlags FMF,
5909 FPClassTest InterestedClasses,
5910 const SimplifyQuery &SQ,
5911 unsigned Depth) {
5912 auto *FVTy = dyn_cast<FixedVectorType>(Val: V->getType());
5913 APInt DemandedElts =
5914 FVTy ? APInt::getAllOnes(numBits: FVTy->getNumElements()) : APInt(1, 1);
5915 return computeKnownFPClass(V, DemandedElts, FMF, InterestedClasses, SQ,
5916 Depth);
5917}
5918
5919bool llvm::cannotBeNegativeZero(const Value *V, const SimplifyQuery &SQ,
5920 unsigned Depth) {
5921 KnownFPClass Known = computeKnownFPClass(V, InterestedClasses: fcNegZero, SQ, Depth);
5922 return Known.isKnownNeverNegZero();
5923}
5924
5925bool llvm::cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ,
5926 unsigned Depth) {
5927 KnownFPClass Known =
5928 computeKnownFPClass(V, InterestedClasses: KnownFPClass::OrderedLessThanZeroMask, SQ, Depth);
5929 return Known.cannotBeOrderedLessThanZero();
5930}
5931
5932bool llvm::isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ,
5933 unsigned Depth) {
5934 KnownFPClass Known = computeKnownFPClass(V, InterestedClasses: fcInf, SQ, Depth);
5935 return Known.isKnownNeverInfinity();
5936}
5937
5938/// Return true if the floating-point value can never contain a NaN or infinity.
5939bool llvm::isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ,
5940 unsigned Depth) {
5941 KnownFPClass Known = computeKnownFPClass(V, InterestedClasses: fcInf | fcNan, SQ, Depth);
5942 return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity();
5943}
5944
5945/// Return true if the floating-point scalar value is not a NaN or if the
5946/// floating-point vector value has no NaN elements. Return false if a value
5947/// could ever be NaN.
5948bool llvm::isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ,
5949 unsigned Depth) {
5950 KnownFPClass Known = computeKnownFPClass(V, InterestedClasses: fcNan, SQ, Depth);
5951 return Known.isKnownNeverNaN();
5952}
5953
5954/// Return false if we can prove that the specified FP value's sign bit is 0.
5955/// Return true if we can prove that the specified FP value's sign bit is 1.
5956/// Otherwise return std::nullopt.
5957std::optional<bool> llvm::computeKnownFPSignBit(const Value *V,
5958 const SimplifyQuery &SQ,
5959 unsigned Depth) {
5960 KnownFPClass Known = computeKnownFPClass(V, InterestedClasses: fcAllFlags, SQ, Depth);
5961 return Known.SignBit;
5962}
5963
5964bool llvm::canIgnoreSignBitOfZero(const Use &U) {
5965 auto *User = cast<Instruction>(Val: U.getUser());
5966 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: User)) {
5967 if (FPOp->hasNoSignedZeros())
5968 return true;
5969 }
5970
5971 switch (User->getOpcode()) {
5972 case Instruction::FPToSI:
5973 case Instruction::FPToUI:
5974 return true;
5975 case Instruction::FCmp:
5976 // fcmp treats both positive and negative zero as equal.
5977 return true;
5978 case Instruction::Call:
5979 if (auto *II = dyn_cast<IntrinsicInst>(Val: User)) {
5980 switch (II->getIntrinsicID()) {
5981 case Intrinsic::fabs:
5982 return true;
5983 case Intrinsic::copysign:
5984 return U.getOperandNo() == 0;
5985 case Intrinsic::is_fpclass:
5986 case Intrinsic::vp_is_fpclass: {
5987 auto Test =
5988 static_cast<FPClassTest>(
5989 cast<ConstantInt>(Val: II->getArgOperand(i: 1))->getZExtValue()) &
5990 FPClassTest::fcZero;
5991 return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
5992 }
5993 default:
5994 return false;
5995 }
5996 }
5997 return false;
5998 default:
5999 return false;
6000 }
6001}
6002
6003bool llvm::canIgnoreSignBitOfNaN(const Use &U) {
6004 auto *User = cast<Instruction>(Val: U.getUser());
6005 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: User)) {
6006 if (FPOp->hasNoNaNs())
6007 return true;
6008 }
6009
6010 switch (User->getOpcode()) {
6011 case Instruction::FPToSI:
6012 case Instruction::FPToUI:
6013 return true;
6014 // Proper FP math operations ignore the sign bit of NaN.
6015 case Instruction::FAdd:
6016 case Instruction::FSub:
6017 case Instruction::FMul:
6018 case Instruction::FDiv:
6019 case Instruction::FRem:
6020 case Instruction::FPTrunc:
6021 case Instruction::FPExt:
6022 case Instruction::FCmp:
6023 return true;
6024 // Bitwise FP operations should preserve the sign bit of NaN.
6025 case Instruction::FNeg:
6026 case Instruction::Select:
6027 case Instruction::PHI:
6028 return false;
6029 case Instruction::Ret:
6030 return User->getFunction()->getAttributes().getRetNoFPClass() &
6031 FPClassTest::fcNan;
6032 case Instruction::Call:
6033 case Instruction::Invoke: {
6034 if (auto *II = dyn_cast<IntrinsicInst>(Val: User)) {
6035 switch (II->getIntrinsicID()) {
6036 case Intrinsic::fabs:
6037 return true;
6038 case Intrinsic::copysign:
6039 return U.getOperandNo() == 0;
6040 // Other proper FP math intrinsics ignore the sign bit of NaN.
6041 case Intrinsic::maxnum:
6042 case Intrinsic::minnum:
6043 case Intrinsic::maximum:
6044 case Intrinsic::minimum:
6045 case Intrinsic::maximumnum:
6046 case Intrinsic::minimumnum:
6047 case Intrinsic::canonicalize:
6048 case Intrinsic::fma:
6049 case Intrinsic::fmuladd:
6050 case Intrinsic::sqrt:
6051 case Intrinsic::pow:
6052 case Intrinsic::powi:
6053 case Intrinsic::fptoui_sat:
6054 case Intrinsic::fptosi_sat:
6055 case Intrinsic::is_fpclass:
6056 case Intrinsic::vp_is_fpclass:
6057 return true;
6058 default:
6059 return false;
6060 }
6061 }
6062
6063 FPClassTest NoFPClass =
6064 cast<CallBase>(Val: User)->getParamNoFPClass(i: U.getOperandNo());
6065 return NoFPClass & FPClassTest::fcNan;
6066 }
6067 default:
6068 return false;
6069 }
6070}
6071
6072Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
6073
6074 // All byte-wide stores are splatable, even of arbitrary variables.
6075 if (V->getType()->isIntegerTy(Bitwidth: 8))
6076 return V;
6077
6078 LLVMContext &Ctx = V->getContext();
6079
6080 // Undef don't care.
6081 auto *UndefInt8 = UndefValue::get(T: Type::getInt8Ty(C&: Ctx));
6082 if (isa<UndefValue>(Val: V))
6083 return UndefInt8;
6084
6085 // Return poison for zero-sized type.
6086 if (DL.getTypeStoreSize(Ty: V->getType()).isZero())
6087 return PoisonValue::get(T: Type::getInt8Ty(C&: Ctx));
6088
6089 Constant *C = dyn_cast<Constant>(Val: V);
6090 if (!C) {
6091 // Conceptually, we could handle things like:
6092 // %a = zext i8 %X to i16
6093 // %b = shl i16 %a, 8
6094 // %c = or i16 %a, %b
6095 // but until there is an example that actually needs this, it doesn't seem
6096 // worth worrying about.
6097 return nullptr;
6098 }
6099
6100 // Handle 'null' ConstantArrayZero etc.
6101 if (C->isNullValue())
6102 return Constant::getNullValue(Ty: Type::getInt8Ty(C&: Ctx));
6103
6104 // Constant floating-point values can be handled as integer values if the
6105 // corresponding integer value is "byteable". An important case is 0.0.
6106 if (ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) {
6107 Type *Ty = nullptr;
6108 if (CFP->getType()->isHalfTy())
6109 Ty = Type::getInt16Ty(C&: Ctx);
6110 else if (CFP->getType()->isFloatTy())
6111 Ty = Type::getInt32Ty(C&: Ctx);
6112 else if (CFP->getType()->isDoubleTy())
6113 Ty = Type::getInt64Ty(C&: Ctx);
6114 // Don't handle long double formats, which have strange constraints.
6115 return Ty ? isBytewiseValue(V: ConstantExpr::getBitCast(C: CFP, Ty), DL)
6116 : nullptr;
6117 }
6118
6119 // We can handle constant integers that are multiple of 8 bits.
6120 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: C)) {
6121 if (CI->getBitWidth() % 8 == 0) {
6122 assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
6123 if (!CI->getValue().isSplat(SplatSizeInBits: 8))
6124 return nullptr;
6125 return ConstantInt::get(Context&: Ctx, V: CI->getValue().trunc(width: 8));
6126 }
6127 }
6128
6129 if (auto *CE = dyn_cast<ConstantExpr>(Val: C)) {
6130 if (CE->getOpcode() == Instruction::IntToPtr) {
6131 if (auto *PtrTy = dyn_cast<PointerType>(Val: CE->getType())) {
6132 unsigned BitWidth = DL.getPointerSizeInBits(AS: PtrTy->getAddressSpace());
6133 if (Constant *Op = ConstantFoldIntegerCast(
6134 C: CE->getOperand(i_nocapture: 0), DestTy: Type::getIntNTy(C&: Ctx, N: BitWidth), IsSigned: false, DL))
6135 return isBytewiseValue(V: Op, DL);
6136 }
6137 }
6138 }
6139
6140 auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
6141 if (LHS == RHS)
6142 return LHS;
6143 if (!LHS || !RHS)
6144 return nullptr;
6145 if (LHS == UndefInt8)
6146 return RHS;
6147 if (RHS == UndefInt8)
6148 return LHS;
6149 return nullptr;
6150 };
6151
6152 if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(Val: C)) {
6153 Value *Val = UndefInt8;
6154 for (uint64_t I = 0, E = CA->getNumElements(); I != E; ++I)
6155 if (!(Val = Merge(Val, isBytewiseValue(V: CA->getElementAsConstant(i: I), DL))))
6156 return nullptr;
6157 return Val;
6158 }
6159
6160 if (isa<ConstantAggregate>(Val: C)) {
6161 Value *Val = UndefInt8;
6162 for (Value *Op : C->operands())
6163 if (!(Val = Merge(Val, isBytewiseValue(V: Op, DL))))
6164 return nullptr;
6165 return Val;
6166 }
6167
6168 // Don't try to handle the handful of other constants.
6169 return nullptr;
6170}
6171
6172// This is the recursive version of BuildSubAggregate. It takes a few different
6173// arguments. Idxs is the index within the nested struct From that we are
6174// looking at now (which is of type IndexedType). IdxSkip is the number of
6175// indices from Idxs that should be left out when inserting into the resulting
6176// struct. To is the result struct built so far, new insertvalue instructions
6177// build on that.
6178static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType,
6179 SmallVectorImpl<unsigned> &Idxs,
6180 unsigned IdxSkip,
6181 BasicBlock::iterator InsertBefore) {
6182 StructType *STy = dyn_cast<StructType>(Val: IndexedType);
6183 if (STy) {
6184 // Save the original To argument so we can modify it
6185 Value *OrigTo = To;
6186 // General case, the type indexed by Idxs is a struct
6187 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
6188 // Process each struct element recursively
6189 Idxs.push_back(Elt: i);
6190 Value *PrevTo = To;
6191 To = BuildSubAggregate(From, To, IndexedType: STy->getElementType(N: i), Idxs, IdxSkip,
6192 InsertBefore);
6193 Idxs.pop_back();
6194 if (!To) {
6195 // Couldn't find any inserted value for this index? Cleanup
6196 while (PrevTo != OrigTo) {
6197 InsertValueInst* Del = cast<InsertValueInst>(Val: PrevTo);
6198 PrevTo = Del->getAggregateOperand();
6199 Del->eraseFromParent();
6200 }
6201 // Stop processing elements
6202 break;
6203 }
6204 }
6205 // If we successfully found a value for each of our subaggregates
6206 if (To)
6207 return To;
6208 }
6209 // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6210 // the struct's elements had a value that was inserted directly. In the latter
6211 // case, perhaps we can't determine each of the subelements individually, but
6212 // we might be able to find the complete struct somewhere.
6213
6214 // Find the value that is at that particular spot
6215 Value *V = FindInsertedValue(V: From, idx_range: Idxs);
6216
6217 if (!V)
6218 return nullptr;
6219
6220 // Insert the value in the new (sub) aggregate
6221 return InsertValueInst::Create(Agg: To, Val: V, Idxs: ArrayRef(Idxs).slice(N: IdxSkip), NameStr: "tmp",
6222 InsertBefore);
6223}
6224
6225// This helper takes a nested struct and extracts a part of it (which is again a
6226// struct) into a new value. For example, given the struct:
6227// { a, { b, { c, d }, e } }
6228// and the indices "1, 1" this returns
6229// { c, d }.
6230//
6231// It does this by inserting an insertvalue for each element in the resulting
6232// struct, as opposed to just inserting a single struct. This will only work if
6233// each of the elements of the substruct are known (ie, inserted into From by an
6234// insertvalue instruction somewhere).
6235//
6236// All inserted insertvalue instructions are inserted before InsertBefore
6237static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
6238 BasicBlock::iterator InsertBefore) {
6239 Type *IndexedType = ExtractValueInst::getIndexedType(Agg: From->getType(),
6240 Idxs: idx_range);
6241 Value *To = PoisonValue::get(T: IndexedType);
6242 SmallVector<unsigned, 10> Idxs(idx_range);
6243 unsigned IdxSkip = Idxs.size();
6244
6245 return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
6246}
6247
6248/// Given an aggregate and a sequence of indices, see if the scalar value
6249/// indexed is already around as a register, for example if it was inserted
6250/// directly into the aggregate.
6251///
6252/// If InsertBefore is not null, this function will duplicate (modified)
6253/// insertvalues when a part of a nested struct is extracted.
6254Value *
6255llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
6256 std::optional<BasicBlock::iterator> InsertBefore) {
6257 // Nothing to index? Just return V then (this is useful at the end of our
6258 // recursion).
6259 if (idx_range.empty())
6260 return V;
6261 // We have indices, so V should have an indexable type.
6262 assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
6263 "Not looking at a struct or array?");
6264 assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
6265 "Invalid indices for type?");
6266
6267 if (Constant *C = dyn_cast<Constant>(Val: V)) {
6268 C = C->getAggregateElement(Elt: idx_range[0]);
6269 if (!C) return nullptr;
6270 return FindInsertedValue(V: C, idx_range: idx_range.slice(N: 1), InsertBefore);
6271 }
6272
6273 if (InsertValueInst *I = dyn_cast<InsertValueInst>(Val: V)) {
6274 // Loop the indices for the insertvalue instruction in parallel with the
6275 // requested indices
6276 const unsigned *req_idx = idx_range.begin();
6277 for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
6278 i != e; ++i, ++req_idx) {
6279 if (req_idx == idx_range.end()) {
6280 // We can't handle this without inserting insertvalues
6281 if (!InsertBefore)
6282 return nullptr;
6283
6284 // The requested index identifies a part of a nested aggregate. Handle
6285 // this specially. For example,
6286 // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6287 // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6288 // %C = extractvalue {i32, { i32, i32 } } %B, 1
6289 // This can be changed into
6290 // %A = insertvalue {i32, i32 } undef, i32 10, 0
6291 // %C = insertvalue {i32, i32 } %A, i32 11, 1
6292 // which allows the unused 0,0 element from the nested struct to be
6293 // removed.
6294 return BuildSubAggregate(From: V, idx_range: ArrayRef(idx_range.begin(), req_idx),
6295 InsertBefore: *InsertBefore);
6296 }
6297
6298 // This insert value inserts something else than what we are looking for.
6299 // See if the (aggregate) value inserted into has the value we are
6300 // looking for, then.
6301 if (*req_idx != *i)
6302 return FindInsertedValue(V: I->getAggregateOperand(), idx_range,
6303 InsertBefore);
6304 }
6305 // If we end up here, the indices of the insertvalue match with those
6306 // requested (though possibly only partially). Now we recursively look at
6307 // the inserted value, passing any remaining indices.
6308 return FindInsertedValue(V: I->getInsertedValueOperand(),
6309 idx_range: ArrayRef(req_idx, idx_range.end()), InsertBefore);
6310 }
6311
6312 if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(Val: V)) {
6313 // If we're extracting a value from an aggregate that was extracted from
6314 // something else, we can extract from that something else directly instead.
6315 // However, we will need to chain I's indices with the requested indices.
6316
6317 // Calculate the number of indices required
6318 unsigned size = I->getNumIndices() + idx_range.size();
6319 // Allocate some space to put the new indices in
6320 SmallVector<unsigned, 5> Idxs;
6321 Idxs.reserve(N: size);
6322 // Add indices from the extract value instruction
6323 Idxs.append(in_start: I->idx_begin(), in_end: I->idx_end());
6324
6325 // Add requested indices
6326 Idxs.append(in_start: idx_range.begin(), in_end: idx_range.end());
6327
6328 assert(Idxs.size() == size
6329 && "Number of indices added not correct?");
6330
6331 return FindInsertedValue(V: I->getAggregateOperand(), idx_range: Idxs, InsertBefore);
6332 }
6333 // Otherwise, we don't know (such as, extracting from a function return value
6334 // or load instruction)
6335 return nullptr;
6336}
6337
6338bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
6339 unsigned CharSize) {
6340 // Make sure the GEP has exactly three arguments.
6341 if (GEP->getNumOperands() != 3)
6342 return false;
6343
6344 // Make sure the index-ee is a pointer to array of \p CharSize integers.
6345 // CharSize.
6346 ArrayType *AT = dyn_cast<ArrayType>(Val: GEP->getSourceElementType());
6347 if (!AT || !AT->getElementType()->isIntegerTy(Bitwidth: CharSize))
6348 return false;
6349
6350 // Check to make sure that the first operand of the GEP is an integer and
6351 // has value 0 so that we are sure we're indexing into the initializer.
6352 const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: 1));
6353 if (!FirstIdx || !FirstIdx->isZero())
6354 return false;
6355
6356 return true;
6357}
6358
6359// If V refers to an initialized global constant, set Slice either to
6360// its initializer if the size of its elements equals ElementSize, or,
6361// for ElementSize == 8, to its representation as an array of unsiged
6362// char. Return true on success.
6363// Offset is in the unit "nr of ElementSize sized elements".
6364bool llvm::getConstantDataArrayInfo(const Value *V,
6365 ConstantDataArraySlice &Slice,
6366 unsigned ElementSize, uint64_t Offset) {
6367 assert(V && "V should not be null.");
6368 assert((ElementSize % 8) == 0 &&
6369 "ElementSize expected to be a multiple of the size of a byte.");
6370 unsigned ElementSizeInBytes = ElementSize / 8;
6371
6372 // Drill down into the pointer expression V, ignoring any intervening
6373 // casts, and determine the identity of the object it references along
6374 // with the cumulative byte offset into it.
6375 const GlobalVariable *GV =
6376 dyn_cast<GlobalVariable>(Val: getUnderlyingObject(V));
6377 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
6378 // Fail if V is not based on constant global object.
6379 return false;
6380
6381 const DataLayout &DL = GV->getDataLayout();
6382 APInt Off(DL.getIndexTypeSizeInBits(Ty: V->getType()), 0);
6383
6384 if (GV != V->stripAndAccumulateConstantOffsets(DL, Offset&: Off,
6385 /*AllowNonInbounds*/ true))
6386 // Fail if a constant offset could not be determined.
6387 return false;
6388
6389 uint64_t StartIdx = Off.getLimitedValue();
6390 if (StartIdx == UINT64_MAX)
6391 // Fail if the constant offset is excessive.
6392 return false;
6393
6394 // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6395 // elements. Simply bail out if that isn't possible.
6396 if ((StartIdx % ElementSizeInBytes) != 0)
6397 return false;
6398
6399 Offset += StartIdx / ElementSizeInBytes;
6400 ConstantDataArray *Array = nullptr;
6401 ArrayType *ArrayTy = nullptr;
6402
6403 if (GV->getInitializer()->isNullValue()) {
6404 Type *GVTy = GV->getValueType();
6405 uint64_t SizeInBytes = DL.getTypeStoreSize(Ty: GVTy).getFixedValue();
6406 uint64_t Length = SizeInBytes / ElementSizeInBytes;
6407
6408 Slice.Array = nullptr;
6409 Slice.Offset = 0;
6410 // Return an empty Slice for undersized constants to let callers
6411 // transform even undefined library calls into simpler, well-defined
6412 // expressions. This is preferable to making the calls although it
6413 // prevents sanitizers from detecting such calls.
6414 Slice.Length = Length < Offset ? 0 : Length - Offset;
6415 return true;
6416 }
6417
6418 auto *Init = const_cast<Constant *>(GV->getInitializer());
6419 if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Val: Init)) {
6420 Type *InitElTy = ArrayInit->getElementType();
6421 if (InitElTy->isIntegerTy(Bitwidth: ElementSize)) {
6422 // If Init is an initializer for an array of the expected type
6423 // and size, use it as is.
6424 Array = ArrayInit;
6425 ArrayTy = ArrayInit->getType();
6426 }
6427 }
6428
6429 if (!Array) {
6430 if (ElementSize != 8)
6431 // TODO: Handle conversions to larger integral types.
6432 return false;
6433
6434 // Otherwise extract the portion of the initializer starting
6435 // at Offset as an array of bytes, and reset Offset.
6436 Init = ReadByteArrayFromGlobal(GV, Offset);
6437 if (!Init)
6438 return false;
6439
6440 Offset = 0;
6441 Array = dyn_cast<ConstantDataArray>(Val: Init);
6442 ArrayTy = dyn_cast<ArrayType>(Val: Init->getType());
6443 }
6444
6445 uint64_t NumElts = ArrayTy->getArrayNumElements();
6446 if (Offset > NumElts)
6447 return false;
6448
6449 Slice.Array = Array;
6450 Slice.Offset = Offset;
6451 Slice.Length = NumElts - Offset;
6452 return true;
6453}
6454
6455/// Extract bytes from the initializer of the constant array V, which need
6456/// not be a nul-terminated string. On success, store the bytes in Str and
6457/// return true. When TrimAtNul is set, Str will contain only the bytes up
6458/// to but not including the first nul. Return false on failure.
6459bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
6460 bool TrimAtNul) {
6461 ConstantDataArraySlice Slice;
6462 if (!getConstantDataArrayInfo(V, Slice, ElementSize: 8))
6463 return false;
6464
6465 if (Slice.Array == nullptr) {
6466 if (TrimAtNul) {
6467 // Return a nul-terminated string even for an empty Slice. This is
6468 // safe because all existing SimplifyLibcalls callers require string
6469 // arguments and the behavior of the functions they fold is undefined
6470 // otherwise. Folding the calls this way is preferable to making
6471 // the undefined library calls, even though it prevents sanitizers
6472 // from reporting such calls.
6473 Str = StringRef();
6474 return true;
6475 }
6476 if (Slice.Length == 1) {
6477 Str = StringRef("", 1);
6478 return true;
6479 }
6480 // We cannot instantiate a StringRef as we do not have an appropriate string
6481 // of 0s at hand.
6482 return false;
6483 }
6484
6485 // Start out with the entire array in the StringRef.
6486 Str = Slice.Array->getAsString();
6487 // Skip over 'offset' bytes.
6488 Str = Str.substr(Start: Slice.Offset);
6489
6490 if (TrimAtNul) {
6491 // Trim off the \0 and anything after it. If the array is not nul
6492 // terminated, we just return the whole end of string. The client may know
6493 // some other way that the string is length-bound.
6494 Str = Str.substr(Start: 0, N: Str.find(C: '\0'));
6495 }
6496 return true;
6497}
6498
6499// These next two are very similar to the above, but also look through PHI
6500// nodes.
6501// TODO: See if we can integrate these two together.
6502
6503/// If we can compute the length of the string pointed to by
6504/// the specified pointer, return 'len+1'. If we can't, return 0.
6505static uint64_t GetStringLengthH(const Value *V,
6506 SmallPtrSetImpl<const PHINode*> &PHIs,
6507 unsigned CharSize) {
6508 // Look through noop bitcast instructions.
6509 V = V->stripPointerCasts();
6510
6511 // If this is a PHI node, there are two cases: either we have already seen it
6512 // or we haven't.
6513 if (const PHINode *PN = dyn_cast<PHINode>(Val: V)) {
6514 if (!PHIs.insert(Ptr: PN).second)
6515 return ~0ULL; // already in the set.
6516
6517 // If it was new, see if all the input strings are the same length.
6518 uint64_t LenSoFar = ~0ULL;
6519 for (Value *IncValue : PN->incoming_values()) {
6520 uint64_t Len = GetStringLengthH(V: IncValue, PHIs, CharSize);
6521 if (Len == 0) return 0; // Unknown length -> unknown.
6522
6523 if (Len == ~0ULL) continue;
6524
6525 if (Len != LenSoFar && LenSoFar != ~0ULL)
6526 return 0; // Disagree -> unknown.
6527 LenSoFar = Len;
6528 }
6529
6530 // Success, all agree.
6531 return LenSoFar;
6532 }
6533
6534 // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6535 if (const SelectInst *SI = dyn_cast<SelectInst>(Val: V)) {
6536 uint64_t Len1 = GetStringLengthH(V: SI->getTrueValue(), PHIs, CharSize);
6537 if (Len1 == 0) return 0;
6538 uint64_t Len2 = GetStringLengthH(V: SI->getFalseValue(), PHIs, CharSize);
6539 if (Len2 == 0) return 0;
6540 if (Len1 == ~0ULL) return Len2;
6541 if (Len2 == ~0ULL) return Len1;
6542 if (Len1 != Len2) return 0;
6543 return Len1;
6544 }
6545
6546 // Otherwise, see if we can read the string.
6547 ConstantDataArraySlice Slice;
6548 if (!getConstantDataArrayInfo(V, Slice, ElementSize: CharSize))
6549 return 0;
6550
6551 if (Slice.Array == nullptr)
6552 // Zeroinitializer (including an empty one).
6553 return 1;
6554
6555 // Search for the first nul character. Return a conservative result even
6556 // when there is no nul. This is safe since otherwise the string function
6557 // being folded such as strlen is undefined, and can be preferable to
6558 // making the undefined library call.
6559 unsigned NullIndex = 0;
6560 for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
6561 if (Slice.Array->getElementAsInteger(i: Slice.Offset + NullIndex) == 0)
6562 break;
6563 }
6564
6565 return NullIndex + 1;
6566}
6567
6568/// If we can compute the length of the string pointed to by
6569/// the specified pointer, return 'len+1'. If we can't, return 0.
6570uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6571 if (!V->getType()->isPointerTy())
6572 return 0;
6573
6574 SmallPtrSet<const PHINode*, 32> PHIs;
6575 uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6576 // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6577 // an empty string as a length.
6578 return Len == ~0ULL ? 1 : Len;
6579}
6580
6581const Value *
6582llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
6583 bool MustPreserveNullness) {
6584 assert(Call &&
6585 "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6586 if (const Value *RV = Call->getReturnedArgOperand())
6587 return RV;
6588 // This can be used only as a aliasing property.
6589 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6590 Call, MustPreserveNullness))
6591 return Call->getArgOperand(i: 0);
6592 return nullptr;
6593}
6594
6595bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6596 const CallBase *Call, bool MustPreserveNullness) {
6597 switch (Call->getIntrinsicID()) {
6598 case Intrinsic::launder_invariant_group:
6599 case Intrinsic::strip_invariant_group:
6600 case Intrinsic::aarch64_irg:
6601 case Intrinsic::aarch64_tagp:
6602 // The amdgcn_make_buffer_rsrc function does not alter the address of the
6603 // input pointer (and thus preserve null-ness for the purposes of escape
6604 // analysis, which is where the MustPreserveNullness flag comes in to play).
6605 // However, it will not necessarily map ptr addrspace(N) null to ptr
6606 // addrspace(8) null, aka the "null descriptor", which has "all loads return
6607 // 0, all stores are dropped" semantics. Given the context of this intrinsic
6608 // list, no one should be relying on such a strict interpretation of
6609 // MustPreserveNullness (and, at time of writing, they are not), but we
6610 // document this fact out of an abundance of caution.
6611 case Intrinsic::amdgcn_make_buffer_rsrc:
6612 return true;
6613 case Intrinsic::ptrmask:
6614 return !MustPreserveNullness;
6615 case Intrinsic::threadlocal_address:
6616 // The underlying variable changes with thread ID. The Thread ID may change
6617 // at coroutine suspend points.
6618 return !Call->getParent()->getParent()->isPresplitCoroutine();
6619 default:
6620 return false;
6621 }
6622}
6623
6624/// \p PN defines a loop-variant pointer to an object. Check if the
6625/// previous iteration of the loop was referring to the same object as \p PN.
6626static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
6627 const LoopInfo *LI) {
6628 // Find the loop-defined value.
6629 Loop *L = LI->getLoopFor(BB: PN->getParent());
6630 if (PN->getNumIncomingValues() != 2)
6631 return true;
6632
6633 // Find the value from previous iteration.
6634 auto *PrevValue = dyn_cast<Instruction>(Val: PN->getIncomingValue(i: 0));
6635 if (!PrevValue || LI->getLoopFor(BB: PrevValue->getParent()) != L)
6636 PrevValue = dyn_cast<Instruction>(Val: PN->getIncomingValue(i: 1));
6637 if (!PrevValue || LI->getLoopFor(BB: PrevValue->getParent()) != L)
6638 return true;
6639
6640 // If a new pointer is loaded in the loop, the pointer references a different
6641 // object in every iteration. E.g.:
6642 // for (i)
6643 // int *p = a[i];
6644 // ...
6645 if (auto *Load = dyn_cast<LoadInst>(Val: PrevValue))
6646 if (!L->isLoopInvariant(V: Load->getPointerOperand()))
6647 return false;
6648 return true;
6649}
6650
6651const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
6652 for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
6653 if (auto *GEP = dyn_cast<GEPOperator>(Val: V)) {
6654 const Value *PtrOp = GEP->getPointerOperand();
6655 if (!PtrOp->getType()->isPointerTy()) // Only handle scalar pointer base.
6656 return V;
6657 V = PtrOp;
6658 } else if (Operator::getOpcode(V) == Instruction::BitCast ||
6659 Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
6660 Value *NewV = cast<Operator>(Val: V)->getOperand(i: 0);
6661 if (!NewV->getType()->isPointerTy())
6662 return V;
6663 V = NewV;
6664 } else if (auto *GA = dyn_cast<GlobalAlias>(Val: V)) {
6665 if (GA->isInterposable())
6666 return V;
6667 V = GA->getAliasee();
6668 } else {
6669 if (auto *PHI = dyn_cast<PHINode>(Val: V)) {
6670 // Look through single-arg phi nodes created by LCSSA.
6671 if (PHI->getNumIncomingValues() == 1) {
6672 V = PHI->getIncomingValue(i: 0);
6673 continue;
6674 }
6675 } else if (auto *Call = dyn_cast<CallBase>(Val: V)) {
6676 // CaptureTracking can know about special capturing properties of some
6677 // intrinsics like launder.invariant.group, that can't be expressed with
6678 // the attributes, but have properties like returning aliasing pointer.
6679 // Because some analysis may assume that nocaptured pointer is not
6680 // returned from some special intrinsic (because function would have to
6681 // be marked with returns attribute), it is crucial to use this function
6682 // because it should be in sync with CaptureTracking. Not using it may
6683 // cause weird miscompilations where 2 aliasing pointers are assumed to
6684 // noalias.
6685 if (auto *RP = getArgumentAliasingToReturnedPointer(Call, MustPreserveNullness: false)) {
6686 V = RP;
6687 continue;
6688 }
6689 }
6690
6691 return V;
6692 }
6693 assert(V->getType()->isPointerTy() && "Unexpected operand type!");
6694 }
6695 return V;
6696}
6697
6698void llvm::getUnderlyingObjects(const Value *V,
6699 SmallVectorImpl<const Value *> &Objects,
6700 const LoopInfo *LI, unsigned MaxLookup) {
6701 SmallPtrSet<const Value *, 4> Visited;
6702 SmallVector<const Value *, 4> Worklist;
6703 Worklist.push_back(Elt: V);
6704 do {
6705 const Value *P = Worklist.pop_back_val();
6706 P = getUnderlyingObject(V: P, MaxLookup);
6707
6708 if (!Visited.insert(Ptr: P).second)
6709 continue;
6710
6711 if (auto *SI = dyn_cast<SelectInst>(Val: P)) {
6712 Worklist.push_back(Elt: SI->getTrueValue());
6713 Worklist.push_back(Elt: SI->getFalseValue());
6714 continue;
6715 }
6716
6717 if (auto *PN = dyn_cast<PHINode>(Val: P)) {
6718 // If this PHI changes the underlying object in every iteration of the
6719 // loop, don't look through it. Consider:
6720 // int **A;
6721 // for (i) {
6722 // Prev = Curr; // Prev = PHI (Prev_0, Curr)
6723 // Curr = A[i];
6724 // *Prev, *Curr;
6725 //
6726 // Prev is tracking Curr one iteration behind so they refer to different
6727 // underlying objects.
6728 if (!LI || !LI->isLoopHeader(BB: PN->getParent()) ||
6729 isSameUnderlyingObjectInLoop(PN, LI))
6730 append_range(C&: Worklist, R: PN->incoming_values());
6731 else
6732 Objects.push_back(Elt: P);
6733 continue;
6734 }
6735
6736 Objects.push_back(Elt: P);
6737 } while (!Worklist.empty());
6738}
6739
6740const Value *llvm::getUnderlyingObjectAggressive(const Value *V) {
6741 const unsigned MaxVisited = 8;
6742
6743 SmallPtrSet<const Value *, 8> Visited;
6744 SmallVector<const Value *, 8> Worklist;
6745 Worklist.push_back(Elt: V);
6746 const Value *Object = nullptr;
6747 // Used as fallback if we can't find a common underlying object through
6748 // recursion.
6749 bool First = true;
6750 const Value *FirstObject = getUnderlyingObject(V);
6751 do {
6752 const Value *P = Worklist.pop_back_val();
6753 P = First ? FirstObject : getUnderlyingObject(V: P);
6754 First = false;
6755
6756 if (!Visited.insert(Ptr: P).second)
6757 continue;
6758
6759 if (Visited.size() == MaxVisited)
6760 return FirstObject;
6761
6762 if (auto *SI = dyn_cast<SelectInst>(Val: P)) {
6763 Worklist.push_back(Elt: SI->getTrueValue());
6764 Worklist.push_back(Elt: SI->getFalseValue());
6765 continue;
6766 }
6767
6768 if (auto *PN = dyn_cast<PHINode>(Val: P)) {
6769 append_range(C&: Worklist, R: PN->incoming_values());
6770 continue;
6771 }
6772
6773 if (!Object)
6774 Object = P;
6775 else if (Object != P)
6776 return FirstObject;
6777 } while (!Worklist.empty());
6778
6779 return Object ? Object : FirstObject;
6780}
6781
6782/// This is the function that does the work of looking through basic
6783/// ptrtoint+arithmetic+inttoptr sequences.
6784static const Value *getUnderlyingObjectFromInt(const Value *V) {
6785 do {
6786 if (const Operator *U = dyn_cast<Operator>(Val: V)) {
6787 // If we find a ptrtoint, we can transfer control back to the
6788 // regular getUnderlyingObjectFromInt.
6789 if (U->getOpcode() == Instruction::PtrToInt)
6790 return U->getOperand(i: 0);
6791 // If we find an add of a constant, a multiplied value, or a phi, it's
6792 // likely that the other operand will lead us to the base
6793 // object. We don't have to worry about the case where the
6794 // object address is somehow being computed by the multiply,
6795 // because our callers only care when the result is an
6796 // identifiable object.
6797 if (U->getOpcode() != Instruction::Add ||
6798 (!isa<ConstantInt>(Val: U->getOperand(i: 1)) &&
6799 Operator::getOpcode(V: U->getOperand(i: 1)) != Instruction::Mul &&
6800 !isa<PHINode>(Val: U->getOperand(i: 1))))
6801 return V;
6802 V = U->getOperand(i: 0);
6803 } else {
6804 return V;
6805 }
6806 assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
6807 } while (true);
6808}
6809
6810/// This is a wrapper around getUnderlyingObjects and adds support for basic
6811/// ptrtoint+arithmetic+inttoptr sequences.
6812/// It returns false if unidentified object is found in getUnderlyingObjects.
6813bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
6814 SmallVectorImpl<Value *> &Objects) {
6815 SmallPtrSet<const Value *, 16> Visited;
6816 SmallVector<const Value *, 4> Working(1, V);
6817 do {
6818 V = Working.pop_back_val();
6819
6820 SmallVector<const Value *, 4> Objs;
6821 getUnderlyingObjects(V, Objects&: Objs);
6822
6823 for (const Value *V : Objs) {
6824 if (!Visited.insert(Ptr: V).second)
6825 continue;
6826 if (Operator::getOpcode(V) == Instruction::IntToPtr) {
6827 const Value *O =
6828 getUnderlyingObjectFromInt(V: cast<User>(Val: V)->getOperand(i: 0));
6829 if (O->getType()->isPointerTy()) {
6830 Working.push_back(Elt: O);
6831 continue;
6832 }
6833 }
6834 // If getUnderlyingObjects fails to find an identifiable object,
6835 // getUnderlyingObjectsForCodeGen also fails for safety.
6836 if (!isIdentifiedObject(V)) {
6837 Objects.clear();
6838 return false;
6839 }
6840 Objects.push_back(Elt: const_cast<Value *>(V));
6841 }
6842 } while (!Working.empty());
6843 return true;
6844}
6845
6846AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
6847 AllocaInst *Result = nullptr;
6848 SmallPtrSet<Value *, 4> Visited;
6849 SmallVector<Value *, 4> Worklist;
6850
6851 auto AddWork = [&](Value *V) {
6852 if (Visited.insert(Ptr: V).second)
6853 Worklist.push_back(Elt: V);
6854 };
6855
6856 AddWork(V);
6857 do {
6858 V = Worklist.pop_back_val();
6859 assert(Visited.count(V));
6860
6861 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val: V)) {
6862 if (Result && Result != AI)
6863 return nullptr;
6864 Result = AI;
6865 } else if (CastInst *CI = dyn_cast<CastInst>(Val: V)) {
6866 AddWork(CI->getOperand(i_nocapture: 0));
6867 } else if (PHINode *PN = dyn_cast<PHINode>(Val: V)) {
6868 for (Value *IncValue : PN->incoming_values())
6869 AddWork(IncValue);
6870 } else if (auto *SI = dyn_cast<SelectInst>(Val: V)) {
6871 AddWork(SI->getTrueValue());
6872 AddWork(SI->getFalseValue());
6873 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
6874 if (OffsetZero && !GEP->hasAllZeroIndices())
6875 return nullptr;
6876 AddWork(GEP->getPointerOperand());
6877 } else if (CallBase *CB = dyn_cast<CallBase>(Val: V)) {
6878 Value *Returned = CB->getReturnedArgOperand();
6879 if (Returned)
6880 AddWork(Returned);
6881 else
6882 return nullptr;
6883 } else {
6884 return nullptr;
6885 }
6886 } while (!Worklist.empty());
6887
6888 return Result;
6889}
6890
6891static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6892 const Value *V, bool AllowLifetime, bool AllowDroppable) {
6893 for (const User *U : V->users()) {
6894 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
6895 if (!II)
6896 return false;
6897
6898 if (AllowLifetime && II->isLifetimeStartOrEnd())
6899 continue;
6900
6901 if (AllowDroppable && II->isDroppable())
6902 continue;
6903
6904 return false;
6905 }
6906 return true;
6907}
6908
6909bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
6910 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6911 V, /* AllowLifetime */ true, /* AllowDroppable */ false);
6912}
6913bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
6914 return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6915 V, /* AllowLifetime */ true, /* AllowDroppable */ true);
6916}
6917
6918bool llvm::isNotCrossLaneOperation(const Instruction *I) {
6919 if (auto *II = dyn_cast<IntrinsicInst>(Val: I))
6920 return isTriviallyVectorizable(ID: II->getIntrinsicID());
6921 auto *Shuffle = dyn_cast<ShuffleVectorInst>(Val: I);
6922 return (!Shuffle || Shuffle->isSelect()) &&
6923 !isa<CallBase, BitCastInst, ExtractElementInst>(Val: I);
6924}
6925
6926bool llvm::isSafeToSpeculativelyExecute(
6927 const Instruction *Inst, const Instruction *CtxI, AssumptionCache *AC,
6928 const DominatorTree *DT, const TargetLibraryInfo *TLI, bool UseVariableInfo,
6929 bool IgnoreUBImplyingAttrs) {
6930 return isSafeToSpeculativelyExecuteWithOpcode(Opcode: Inst->getOpcode(), Inst, CtxI,
6931 AC, DT, TLI, UseVariableInfo,
6932 IgnoreUBImplyingAttrs);
6933}
6934
6935bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
6936 unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
6937 AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI,
6938 bool UseVariableInfo, bool IgnoreUBImplyingAttrs) {
6939#ifndef NDEBUG
6940 if (Inst->getOpcode() != Opcode) {
6941 // Check that the operands are actually compatible with the Opcode override.
6942 auto hasEqualReturnAndLeadingOperandTypes =
6943 [](const Instruction *Inst, unsigned NumLeadingOperands) {
6944 if (Inst->getNumOperands() < NumLeadingOperands)
6945 return false;
6946 const Type *ExpectedType = Inst->getType();
6947 for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
6948 if (Inst->getOperand(ItOp)->getType() != ExpectedType)
6949 return false;
6950 return true;
6951 };
6952 assert(!Instruction::isBinaryOp(Opcode) ||
6953 hasEqualReturnAndLeadingOperandTypes(Inst, 2));
6954 assert(!Instruction::isUnaryOp(Opcode) ||
6955 hasEqualReturnAndLeadingOperandTypes(Inst, 1));
6956 }
6957#endif
6958
6959 switch (Opcode) {
6960 default:
6961 return true;
6962 case Instruction::UDiv:
6963 case Instruction::URem: {
6964 // x / y is undefined if y == 0.
6965 const APInt *V;
6966 if (match(V: Inst->getOperand(i: 1), P: m_APInt(Res&: V)))
6967 return *V != 0;
6968 return false;
6969 }
6970 case Instruction::SDiv:
6971 case Instruction::SRem: {
6972 // x / y is undefined if y == 0 or x == INT_MIN and y == -1
6973 const APInt *Numerator, *Denominator;
6974 if (!match(V: Inst->getOperand(i: 1), P: m_APInt(Res&: Denominator)))
6975 return false;
6976 // We cannot hoist this division if the denominator is 0.
6977 if (*Denominator == 0)
6978 return false;
6979 // It's safe to hoist if the denominator is not 0 or -1.
6980 if (!Denominator->isAllOnes())
6981 return true;
6982 // At this point we know that the denominator is -1. It is safe to hoist as
6983 // long we know that the numerator is not INT_MIN.
6984 if (match(V: Inst->getOperand(i: 0), P: m_APInt(Res&: Numerator)))
6985 return !Numerator->isMinSignedValue();
6986 // The numerator *might* be MinSignedValue.
6987 return false;
6988 }
6989 case Instruction::Load: {
6990 if (!UseVariableInfo)
6991 return false;
6992
6993 const LoadInst *LI = dyn_cast<LoadInst>(Val: Inst);
6994 if (!LI)
6995 return false;
6996 if (mustSuppressSpeculation(LI: *LI))
6997 return false;
6998 const DataLayout &DL = LI->getDataLayout();
6999 return isDereferenceableAndAlignedPointer(V: LI->getPointerOperand(),
7000 Ty: LI->getType(), Alignment: LI->getAlign(), DL,
7001 CtxI, AC, DT, TLI);
7002 }
7003 case Instruction::Call: {
7004 auto *CI = dyn_cast<const CallInst>(Val: Inst);
7005 if (!CI)
7006 return false;
7007 const Function *Callee = CI->getCalledFunction();
7008
7009 // The called function could have undefined behavior or side-effects, even
7010 // if marked readnone nounwind.
7011 if (!Callee || !Callee->isSpeculatable())
7012 return false;
7013 // Since the operands may be changed after hoisting, undefined behavior may
7014 // be triggered by some UB-implying attributes.
7015 return IgnoreUBImplyingAttrs || !CI->hasUBImplyingAttrs();
7016 }
7017 case Instruction::VAArg:
7018 case Instruction::Alloca:
7019 case Instruction::Invoke:
7020 case Instruction::CallBr:
7021 case Instruction::PHI:
7022 case Instruction::Store:
7023 case Instruction::Ret:
7024 case Instruction::Br:
7025 case Instruction::IndirectBr:
7026 case Instruction::Switch:
7027 case Instruction::Unreachable:
7028 case Instruction::Fence:
7029 case Instruction::AtomicRMW:
7030 case Instruction::AtomicCmpXchg:
7031 case Instruction::LandingPad:
7032 case Instruction::Resume:
7033 case Instruction::CatchSwitch:
7034 case Instruction::CatchPad:
7035 case Instruction::CatchRet:
7036 case Instruction::CleanupPad:
7037 case Instruction::CleanupRet:
7038 return false; // Misc instructions which have effects
7039 }
7040}
7041
7042bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
7043 if (I.mayReadOrWriteMemory())
7044 // Memory dependency possible
7045 return true;
7046 if (!isSafeToSpeculativelyExecute(Inst: &I))
7047 // Can't move above a maythrow call or infinite loop. Or if an
7048 // inalloca alloca, above a stacksave call.
7049 return true;
7050 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7051 // 1) Can't reorder two inf-loop calls, even if readonly
7052 // 2) Also can't reorder an inf-loop call below a instruction which isn't
7053 // safe to speculative execute. (Inverse of above)
7054 return true;
7055 return false;
7056}
7057
7058/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
7059static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
7060 switch (OR) {
7061 case ConstantRange::OverflowResult::MayOverflow:
7062 return OverflowResult::MayOverflow;
7063 case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7064 return OverflowResult::AlwaysOverflowsLow;
7065 case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
7066 return OverflowResult::AlwaysOverflowsHigh;
7067 case ConstantRange::OverflowResult::NeverOverflows:
7068 return OverflowResult::NeverOverflows;
7069 }
7070 llvm_unreachable("Unknown OverflowResult");
7071}
7072
7073/// Combine constant ranges from computeConstantRange() and computeKnownBits().
7074ConstantRange
7075llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
7076 bool ForSigned,
7077 const SimplifyQuery &SQ) {
7078 ConstantRange CR1 =
7079 ConstantRange::fromKnownBits(Known: V.getKnownBits(Q: SQ), IsSigned: ForSigned);
7080 ConstantRange CR2 = computeConstantRange(V, ForSigned, UseInstrInfo: SQ.IIQ.UseInstrInfo);
7081 ConstantRange::PreferredRangeType RangeType =
7082 ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
7083 return CR1.intersectWith(CR: CR2, Type: RangeType);
7084}
7085
7086OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
7087 const Value *RHS,
7088 const SimplifyQuery &SQ,
7089 bool IsNSW) {
7090 KnownBits LHSKnown = computeKnownBits(V: LHS, Q: SQ);
7091 KnownBits RHSKnown = computeKnownBits(V: RHS, Q: SQ);
7092
7093 // mul nsw of two non-negative numbers is also nuw.
7094 if (IsNSW && LHSKnown.isNonNegative() && RHSKnown.isNonNegative())
7095 return OverflowResult::NeverOverflows;
7096
7097 ConstantRange LHSRange = ConstantRange::fromKnownBits(Known: LHSKnown, IsSigned: false);
7098 ConstantRange RHSRange = ConstantRange::fromKnownBits(Known: RHSKnown, IsSigned: false);
7099 return mapOverflowResult(OR: LHSRange.unsignedMulMayOverflow(Other: RHSRange));
7100}
7101
7102OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
7103 const Value *RHS,
7104 const SimplifyQuery &SQ) {
7105 // Multiplying n * m significant bits yields a result of n + m significant
7106 // bits. If the total number of significant bits does not exceed the
7107 // result bit width (minus 1), there is no overflow.
7108 // This means if we have enough leading sign bits in the operands
7109 // we can guarantee that the result does not overflow.
7110 // Ref: "Hacker's Delight" by Henry Warren
7111 unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
7112
7113 // Note that underestimating the number of sign bits gives a more
7114 // conservative answer.
7115 unsigned SignBits =
7116 ::ComputeNumSignBits(V: LHS, Q: SQ) + ::ComputeNumSignBits(V: RHS, Q: SQ);
7117
7118 // First handle the easy case: if we have enough sign bits there's
7119 // definitely no overflow.
7120 if (SignBits > BitWidth + 1)
7121 return OverflowResult::NeverOverflows;
7122
7123 // There are two ambiguous cases where there can be no overflow:
7124 // SignBits == BitWidth + 1 and
7125 // SignBits == BitWidth
7126 // The second case is difficult to check, therefore we only handle the
7127 // first case.
7128 if (SignBits == BitWidth + 1) {
7129 // It overflows only when both arguments are negative and the true
7130 // product is exactly the minimum negative number.
7131 // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7132 // For simplicity we just check if at least one side is not negative.
7133 KnownBits LHSKnown = computeKnownBits(V: LHS, Q: SQ);
7134 KnownBits RHSKnown = computeKnownBits(V: RHS, Q: SQ);
7135 if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
7136 return OverflowResult::NeverOverflows;
7137 }
7138 return OverflowResult::MayOverflow;
7139}
7140
7141OverflowResult
7142llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
7143 const WithCache<const Value *> &RHS,
7144 const SimplifyQuery &SQ) {
7145 ConstantRange LHSRange =
7146 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/false, SQ);
7147 ConstantRange RHSRange =
7148 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/false, SQ);
7149 return mapOverflowResult(OR: LHSRange.unsignedAddMayOverflow(Other: RHSRange));
7150}
7151
7152static OverflowResult
7153computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7154 const WithCache<const Value *> &RHS,
7155 const AddOperator *Add, const SimplifyQuery &SQ) {
7156 if (Add && Add->hasNoSignedWrap()) {
7157 return OverflowResult::NeverOverflows;
7158 }
7159
7160 // If LHS and RHS each have at least two sign bits, the addition will look
7161 // like
7162 //
7163 // XX..... +
7164 // YY.....
7165 //
7166 // If the carry into the most significant position is 0, X and Y can't both
7167 // be 1 and therefore the carry out of the addition is also 0.
7168 //
7169 // If the carry into the most significant position is 1, X and Y can't both
7170 // be 0 and therefore the carry out of the addition is also 1.
7171 //
7172 // Since the carry into the most significant position is always equal to
7173 // the carry out of the addition, there is no signed overflow.
7174 if (::ComputeNumSignBits(V: LHS, Q: SQ) > 1 && ::ComputeNumSignBits(V: RHS, Q: SQ) > 1)
7175 return OverflowResult::NeverOverflows;
7176
7177 ConstantRange LHSRange =
7178 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/true, SQ);
7179 ConstantRange RHSRange =
7180 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/true, SQ);
7181 OverflowResult OR =
7182 mapOverflowResult(OR: LHSRange.signedAddMayOverflow(Other: RHSRange));
7183 if (OR != OverflowResult::MayOverflow)
7184 return OR;
7185
7186 // The remaining code needs Add to be available. Early returns if not so.
7187 if (!Add)
7188 return OverflowResult::MayOverflow;
7189
7190 // If the sign of Add is the same as at least one of the operands, this add
7191 // CANNOT overflow. If this can be determined from the known bits of the
7192 // operands the above signedAddMayOverflow() check will have already done so.
7193 // The only other way to improve on the known bits is from an assumption, so
7194 // call computeKnownBitsFromContext() directly.
7195 bool LHSOrRHSKnownNonNegative =
7196 (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
7197 bool LHSOrRHSKnownNegative =
7198 (LHSRange.isAllNegative() || RHSRange.isAllNegative());
7199 if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
7200 KnownBits AddKnown(LHSRange.getBitWidth());
7201 computeKnownBitsFromContext(V: Add, Known&: AddKnown, Q: SQ);
7202 if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
7203 (AddKnown.isNegative() && LHSOrRHSKnownNegative))
7204 return OverflowResult::NeverOverflows;
7205 }
7206
7207 return OverflowResult::MayOverflow;
7208}
7209
7210OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
7211 const Value *RHS,
7212 const SimplifyQuery &SQ) {
7213 // X - (X % ?)
7214 // The remainder of a value can't have greater magnitude than itself,
7215 // so the subtraction can't overflow.
7216
7217 // X - (X -nuw ?)
7218 // In the minimal case, this would simplify to "?", so there's no subtract
7219 // at all. But if this analysis is used to peek through casts, for example,
7220 // then determining no-overflow may allow other transforms.
7221
7222 // TODO: There are other patterns like this.
7223 // See simplifyICmpWithBinOpOnLHS() for candidates.
7224 if (match(V: RHS, P: m_URem(L: m_Specific(V: LHS), R: m_Value())) ||
7225 match(V: RHS, P: m_NUWSub(L: m_Specific(V: LHS), R: m_Value())))
7226 if (isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
7227 return OverflowResult::NeverOverflows;
7228
7229 if (auto C = isImpliedByDomCondition(Pred: CmpInst::ICMP_UGE, LHS, RHS, ContextI: SQ.CxtI,
7230 DL: SQ.DL)) {
7231 if (*C)
7232 return OverflowResult::NeverOverflows;
7233 return OverflowResult::AlwaysOverflowsLow;
7234 }
7235
7236 ConstantRange LHSRange =
7237 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/false, SQ);
7238 ConstantRange RHSRange =
7239 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/false, SQ);
7240 return mapOverflowResult(OR: LHSRange.unsignedSubMayOverflow(Other: RHSRange));
7241}
7242
7243OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
7244 const Value *RHS,
7245 const SimplifyQuery &SQ) {
7246 // X - (X % ?)
7247 // The remainder of a value can't have greater magnitude than itself,
7248 // so the subtraction can't overflow.
7249
7250 // X - (X -nsw ?)
7251 // In the minimal case, this would simplify to "?", so there's no subtract
7252 // at all. But if this analysis is used to peek through casts, for example,
7253 // then determining no-overflow may allow other transforms.
7254 if (match(V: RHS, P: m_SRem(L: m_Specific(V: LHS), R: m_Value())) ||
7255 match(V: RHS, P: m_NSWSub(L: m_Specific(V: LHS), R: m_Value())))
7256 if (isGuaranteedNotToBeUndef(V: LHS, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT))
7257 return OverflowResult::NeverOverflows;
7258
7259 // If LHS and RHS each have at least two sign bits, the subtraction
7260 // cannot overflow.
7261 if (::ComputeNumSignBits(V: LHS, Q: SQ) > 1 && ::ComputeNumSignBits(V: RHS, Q: SQ) > 1)
7262 return OverflowResult::NeverOverflows;
7263
7264 ConstantRange LHSRange =
7265 computeConstantRangeIncludingKnownBits(V: LHS, /*ForSigned=*/true, SQ);
7266 ConstantRange RHSRange =
7267 computeConstantRangeIncludingKnownBits(V: RHS, /*ForSigned=*/true, SQ);
7268 return mapOverflowResult(OR: LHSRange.signedSubMayOverflow(Other: RHSRange));
7269}
7270
7271bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
7272 const DominatorTree &DT) {
7273 SmallVector<const BranchInst *, 2> GuardingBranches;
7274 SmallVector<const ExtractValueInst *, 2> Results;
7275
7276 for (const User *U : WO->users()) {
7277 if (const auto *EVI = dyn_cast<ExtractValueInst>(Val: U)) {
7278 assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
7279
7280 if (EVI->getIndices()[0] == 0)
7281 Results.push_back(Elt: EVI);
7282 else {
7283 assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
7284
7285 for (const auto *U : EVI->users())
7286 if (const auto *B = dyn_cast<BranchInst>(Val: U)) {
7287 assert(B->isConditional() && "How else is it using an i1?");
7288 GuardingBranches.push_back(Elt: B);
7289 }
7290 }
7291 } else {
7292 // We are using the aggregate directly in a way we don't want to analyze
7293 // here (storing it to a global, say).
7294 return false;
7295 }
7296 }
7297
7298 auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
7299 BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(i: 1));
7300 if (!NoWrapEdge.isSingleEdge())
7301 return false;
7302
7303 // Check if all users of the add are provably no-wrap.
7304 for (const auto *Result : Results) {
7305 // If the extractvalue itself is not executed on overflow, the we don't
7306 // need to check each use separately, since domination is transitive.
7307 if (DT.dominates(BBE: NoWrapEdge, BB: Result->getParent()))
7308 continue;
7309
7310 for (const auto &RU : Result->uses())
7311 if (!DT.dominates(BBE: NoWrapEdge, U: RU))
7312 return false;
7313 }
7314
7315 return true;
7316 };
7317
7318 return llvm::any_of(Range&: GuardingBranches, P: AllUsesGuardedByBranch);
7319}
7320
7321/// Shifts return poison if shiftwidth is larger than the bitwidth.
7322static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
7323 auto *C = dyn_cast<Constant>(Val: ShiftAmount);
7324 if (!C)
7325 return false;
7326
7327 // Shifts return poison if shiftwidth is larger than the bitwidth.
7328 SmallVector<const Constant *, 4> ShiftAmounts;
7329 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: C->getType())) {
7330 unsigned NumElts = FVTy->getNumElements();
7331 for (unsigned i = 0; i < NumElts; ++i)
7332 ShiftAmounts.push_back(Elt: C->getAggregateElement(Elt: i));
7333 } else if (isa<ScalableVectorType>(Val: C->getType()))
7334 return false; // Can't tell, just return false to be safe
7335 else
7336 ShiftAmounts.push_back(Elt: C);
7337
7338 bool Safe = llvm::all_of(Range&: ShiftAmounts, P: [](const Constant *C) {
7339 auto *CI = dyn_cast_or_null<ConstantInt>(Val: C);
7340 return CI && CI->getValue().ult(RHS: C->getType()->getIntegerBitWidth());
7341 });
7342
7343 return Safe;
7344}
7345
7346enum class UndefPoisonKind {
7347 PoisonOnly = (1 << 0),
7348 UndefOnly = (1 << 1),
7349 UndefOrPoison = PoisonOnly | UndefOnly,
7350};
7351
7352static bool includesPoison(UndefPoisonKind Kind) {
7353 return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
7354}
7355
7356static bool includesUndef(UndefPoisonKind Kind) {
7357 return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
7358}
7359
7360static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
7361 bool ConsiderFlagsAndMetadata) {
7362
7363 if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
7364 Op->hasPoisonGeneratingAnnotations())
7365 return true;
7366
7367 unsigned Opcode = Op->getOpcode();
7368
7369 // Check whether opcode is a poison/undef-generating operation
7370 switch (Opcode) {
7371 case Instruction::Shl:
7372 case Instruction::AShr:
7373 case Instruction::LShr:
7374 return includesPoison(Kind) && !shiftAmountKnownInRange(ShiftAmount: Op->getOperand(i: 1));
7375 case Instruction::FPToSI:
7376 case Instruction::FPToUI:
7377 // fptosi/ui yields poison if the resulting value does not fit in the
7378 // destination type.
7379 return true;
7380 case Instruction::Call:
7381 if (auto *II = dyn_cast<IntrinsicInst>(Val: Op)) {
7382 switch (II->getIntrinsicID()) {
7383 // TODO: Add more intrinsics.
7384 case Intrinsic::ctlz:
7385 case Intrinsic::cttz:
7386 case Intrinsic::abs:
7387 if (cast<ConstantInt>(Val: II->getArgOperand(i: 1))->isNullValue())
7388 return false;
7389 break;
7390 case Intrinsic::ctpop:
7391 case Intrinsic::bswap:
7392 case Intrinsic::bitreverse:
7393 case Intrinsic::fshl:
7394 case Intrinsic::fshr:
7395 case Intrinsic::smax:
7396 case Intrinsic::smin:
7397 case Intrinsic::umax:
7398 case Intrinsic::umin:
7399 case Intrinsic::ptrmask:
7400 case Intrinsic::fptoui_sat:
7401 case Intrinsic::fptosi_sat:
7402 case Intrinsic::sadd_with_overflow:
7403 case Intrinsic::ssub_with_overflow:
7404 case Intrinsic::smul_with_overflow:
7405 case Intrinsic::uadd_with_overflow:
7406 case Intrinsic::usub_with_overflow:
7407 case Intrinsic::umul_with_overflow:
7408 case Intrinsic::sadd_sat:
7409 case Intrinsic::uadd_sat:
7410 case Intrinsic::ssub_sat:
7411 case Intrinsic::usub_sat:
7412 return false;
7413 case Intrinsic::sshl_sat:
7414 case Intrinsic::ushl_sat:
7415 return includesPoison(Kind) &&
7416 !shiftAmountKnownInRange(ShiftAmount: II->getArgOperand(i: 1));
7417 case Intrinsic::fma:
7418 case Intrinsic::fmuladd:
7419 case Intrinsic::sqrt:
7420 case Intrinsic::powi:
7421 case Intrinsic::sin:
7422 case Intrinsic::cos:
7423 case Intrinsic::pow:
7424 case Intrinsic::log:
7425 case Intrinsic::log10:
7426 case Intrinsic::log2:
7427 case Intrinsic::exp:
7428 case Intrinsic::exp2:
7429 case Intrinsic::exp10:
7430 case Intrinsic::fabs:
7431 case Intrinsic::copysign:
7432 case Intrinsic::floor:
7433 case Intrinsic::ceil:
7434 case Intrinsic::trunc:
7435 case Intrinsic::rint:
7436 case Intrinsic::nearbyint:
7437 case Intrinsic::round:
7438 case Intrinsic::roundeven:
7439 case Intrinsic::fptrunc_round:
7440 case Intrinsic::canonicalize:
7441 case Intrinsic::arithmetic_fence:
7442 case Intrinsic::minnum:
7443 case Intrinsic::maxnum:
7444 case Intrinsic::minimum:
7445 case Intrinsic::maximum:
7446 case Intrinsic::minimumnum:
7447 case Intrinsic::maximumnum:
7448 case Intrinsic::is_fpclass:
7449 case Intrinsic::ldexp:
7450 case Intrinsic::frexp:
7451 return false;
7452 case Intrinsic::lround:
7453 case Intrinsic::llround:
7454 case Intrinsic::lrint:
7455 case Intrinsic::llrint:
7456 // If the value doesn't fit an unspecified value is returned (but this
7457 // is not poison).
7458 return false;
7459 }
7460 }
7461 [[fallthrough]];
7462 case Instruction::CallBr:
7463 case Instruction::Invoke: {
7464 const auto *CB = cast<CallBase>(Val: Op);
7465 return !CB->hasRetAttr(Kind: Attribute::NoUndef);
7466 }
7467 case Instruction::InsertElement:
7468 case Instruction::ExtractElement: {
7469 // If index exceeds the length of the vector, it returns poison
7470 auto *VTy = cast<VectorType>(Val: Op->getOperand(i: 0)->getType());
7471 unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
7472 auto *Idx = dyn_cast<ConstantInt>(Val: Op->getOperand(i: IdxOp));
7473 if (includesPoison(Kind))
7474 return !Idx ||
7475 Idx->getValue().uge(RHS: VTy->getElementCount().getKnownMinValue());
7476 return false;
7477 }
7478 case Instruction::ShuffleVector: {
7479 ArrayRef<int> Mask = isa<ConstantExpr>(Val: Op)
7480 ? cast<ConstantExpr>(Val: Op)->getShuffleMask()
7481 : cast<ShuffleVectorInst>(Val: Op)->getShuffleMask();
7482 return includesPoison(Kind) && is_contained(Range&: Mask, Element: PoisonMaskElem);
7483 }
7484 case Instruction::FNeg:
7485 case Instruction::PHI:
7486 case Instruction::Select:
7487 case Instruction::ExtractValue:
7488 case Instruction::InsertValue:
7489 case Instruction::Freeze:
7490 case Instruction::ICmp:
7491 case Instruction::FCmp:
7492 case Instruction::GetElementPtr:
7493 return false;
7494 case Instruction::AddrSpaceCast:
7495 return true;
7496 default: {
7497 const auto *CE = dyn_cast<ConstantExpr>(Val: Op);
7498 if (isa<CastInst>(Val: Op) || (CE && CE->isCast()))
7499 return false;
7500 else if (Instruction::isBinaryOp(Opcode))
7501 return false;
7502 // Be conservative and return true.
7503 return true;
7504 }
7505 }
7506}
7507
7508bool llvm::canCreateUndefOrPoison(const Operator *Op,
7509 bool ConsiderFlagsAndMetadata) {
7510 return ::canCreateUndefOrPoison(Op, Kind: UndefPoisonKind::UndefOrPoison,
7511 ConsiderFlagsAndMetadata);
7512}
7513
7514bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
7515 return ::canCreateUndefOrPoison(Op, Kind: UndefPoisonKind::PoisonOnly,
7516 ConsiderFlagsAndMetadata);
7517}
7518
7519static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
7520 unsigned Depth) {
7521 if (ValAssumedPoison == V)
7522 return true;
7523
7524 const unsigned MaxDepth = 2;
7525 if (Depth >= MaxDepth)
7526 return false;
7527
7528 if (const auto *I = dyn_cast<Instruction>(Val: V)) {
7529 if (any_of(Range: I->operands(), P: [=](const Use &Op) {
7530 return propagatesPoison(PoisonOp: Op) &&
7531 directlyImpliesPoison(ValAssumedPoison, V: Op, Depth: Depth + 1);
7532 }))
7533 return true;
7534
7535 // V = extractvalue V0, idx
7536 // V2 = extractvalue V0, idx2
7537 // V0's elements are all poison or not. (e.g., add_with_overflow)
7538 const WithOverflowInst *II;
7539 if (match(V: I, P: m_ExtractValue(V: m_WithOverflowInst(I&: II))) &&
7540 (match(V: ValAssumedPoison, P: m_ExtractValue(V: m_Specific(V: II))) ||
7541 llvm::is_contained(Range: II->args(), Element: ValAssumedPoison)))
7542 return true;
7543 }
7544 return false;
7545}
7546
7547static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
7548 unsigned Depth) {
7549 if (isGuaranteedNotToBePoison(V: ValAssumedPoison))
7550 return true;
7551
7552 if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
7553 return true;
7554
7555 const unsigned MaxDepth = 2;
7556 if (Depth >= MaxDepth)
7557 return false;
7558
7559 const auto *I = dyn_cast<Instruction>(Val: ValAssumedPoison);
7560 if (I && !canCreatePoison(Op: cast<Operator>(Val: I))) {
7561 return all_of(Range: I->operands(), P: [=](const Value *Op) {
7562 return impliesPoison(ValAssumedPoison: Op, V, Depth: Depth + 1);
7563 });
7564 }
7565 return false;
7566}
7567
7568bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
7569 return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
7570}
7571
7572static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
7573
7574static bool isGuaranteedNotToBeUndefOrPoison(
7575 const Value *V, AssumptionCache *AC, const Instruction *CtxI,
7576 const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
7577 if (Depth >= MaxAnalysisRecursionDepth)
7578 return false;
7579
7580 if (isa<MetadataAsValue>(Val: V))
7581 return false;
7582
7583 if (const auto *A = dyn_cast<Argument>(Val: V)) {
7584 if (A->hasAttribute(Kind: Attribute::NoUndef) ||
7585 A->hasAttribute(Kind: Attribute::Dereferenceable) ||
7586 A->hasAttribute(Kind: Attribute::DereferenceableOrNull))
7587 return true;
7588 }
7589
7590 if (auto *C = dyn_cast<Constant>(Val: V)) {
7591 if (isa<PoisonValue>(Val: C))
7592 return !includesPoison(Kind);
7593
7594 if (isa<UndefValue>(Val: C))
7595 return !includesUndef(Kind);
7596
7597 if (isa<ConstantInt>(Val: C) || isa<GlobalVariable>(Val: C) || isa<ConstantFP>(Val: C) ||
7598 isa<ConstantPointerNull>(Val: C) || isa<Function>(Val: C))
7599 return true;
7600
7601 if (C->getType()->isVectorTy()) {
7602 if (isa<ConstantExpr>(Val: C)) {
7603 // Scalable vectors can use a ConstantExpr to build a splat.
7604 if (Constant *SplatC = C->getSplatValue())
7605 if (isa<ConstantInt>(Val: SplatC) || isa<ConstantFP>(Val: SplatC))
7606 return true;
7607 } else {
7608 if (includesUndef(Kind) && C->containsUndefElement())
7609 return false;
7610 if (includesPoison(Kind) && C->containsPoisonElement())
7611 return false;
7612 return !C->containsConstantExpression();
7613 }
7614 }
7615 }
7616
7617 // Strip cast operations from a pointer value.
7618 // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7619 // inbounds with zero offset. To guarantee that the result isn't poison, the
7620 // stripped pointer is checked as it has to be pointing into an allocated
7621 // object or be null `null` to ensure `inbounds` getelement pointers with a
7622 // zero offset could not produce poison.
7623 // It can strip off addrspacecast that do not change bit representation as
7624 // well. We believe that such addrspacecast is equivalent to no-op.
7625 auto *StrippedV = V->stripPointerCastsSameRepresentation();
7626 if (isa<AllocaInst>(Val: StrippedV) || isa<GlobalVariable>(Val: StrippedV) ||
7627 isa<Function>(Val: StrippedV) || isa<ConstantPointerNull>(Val: StrippedV))
7628 return true;
7629
7630 auto OpCheck = [&](const Value *V) {
7631 return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth: Depth + 1, Kind);
7632 };
7633
7634 if (auto *Opr = dyn_cast<Operator>(Val: V)) {
7635 // If the value is a freeze instruction, then it can never
7636 // be undef or poison.
7637 if (isa<FreezeInst>(Val: V))
7638 return true;
7639
7640 if (const auto *CB = dyn_cast<CallBase>(Val: V)) {
7641 if (CB->hasRetAttr(Kind: Attribute::NoUndef) ||
7642 CB->hasRetAttr(Kind: Attribute::Dereferenceable) ||
7643 CB->hasRetAttr(Kind: Attribute::DereferenceableOrNull))
7644 return true;
7645 }
7646
7647 if (const auto *PN = dyn_cast<PHINode>(Val: V)) {
7648 unsigned Num = PN->getNumIncomingValues();
7649 bool IsWellDefined = true;
7650 for (unsigned i = 0; i < Num; ++i) {
7651 if (PN == PN->getIncomingValue(i))
7652 continue;
7653 auto *TI = PN->getIncomingBlock(i)->getTerminator();
7654 if (!isGuaranteedNotToBeUndefOrPoison(V: PN->getIncomingValue(i), AC, CtxI: TI,
7655 DT, Depth: Depth + 1, Kind)) {
7656 IsWellDefined = false;
7657 break;
7658 }
7659 }
7660 if (IsWellDefined)
7661 return true;
7662 } else if (!::canCreateUndefOrPoison(Op: Opr, Kind,
7663 /*ConsiderFlagsAndMetadata*/ true) &&
7664 all_of(Range: Opr->operands(), P: OpCheck))
7665 return true;
7666 }
7667
7668 if (auto *I = dyn_cast<LoadInst>(Val: V))
7669 if (I->hasMetadata(KindID: LLVMContext::MD_noundef) ||
7670 I->hasMetadata(KindID: LLVMContext::MD_dereferenceable) ||
7671 I->hasMetadata(KindID: LLVMContext::MD_dereferenceable_or_null))
7672 return true;
7673
7674 if (programUndefinedIfUndefOrPoison(V, PoisonOnly: !includesUndef(Kind)))
7675 return true;
7676
7677 // CxtI may be null or a cloned instruction.
7678 if (!CtxI || !CtxI->getParent() || !DT)
7679 return false;
7680
7681 auto *DNode = DT->getNode(BB: CtxI->getParent());
7682 if (!DNode)
7683 // Unreachable block
7684 return false;
7685
7686 // If V is used as a branch condition before reaching CtxI, V cannot be
7687 // undef or poison.
7688 // br V, BB1, BB2
7689 // BB1:
7690 // CtxI ; V cannot be undef or poison here
7691 auto *Dominator = DNode->getIDom();
7692 // This check is purely for compile time reasons: we can skip the IDom walk
7693 // if what we are checking for includes undef and the value is not an integer.
7694 if (!includesUndef(Kind) || V->getType()->isIntegerTy())
7695 while (Dominator) {
7696 auto *TI = Dominator->getBlock()->getTerminator();
7697
7698 Value *Cond = nullptr;
7699 if (auto BI = dyn_cast_or_null<BranchInst>(Val: TI)) {
7700 if (BI->isConditional())
7701 Cond = BI->getCondition();
7702 } else if (auto SI = dyn_cast_or_null<SwitchInst>(Val: TI)) {
7703 Cond = SI->getCondition();
7704 }
7705
7706 if (Cond) {
7707 if (Cond == V)
7708 return true;
7709 else if (!includesUndef(Kind) && isa<Operator>(Val: Cond)) {
7710 // For poison, we can analyze further
7711 auto *Opr = cast<Operator>(Val: Cond);
7712 if (any_of(Range: Opr->operands(), P: [V](const Use &U) {
7713 return V == U && propagatesPoison(PoisonOp: U);
7714 }))
7715 return true;
7716 }
7717 }
7718
7719 Dominator = Dominator->getIDom();
7720 }
7721
7722 if (AC && getKnowledgeValidInContext(V, AttrKinds: {Attribute::NoUndef}, AC&: *AC, CtxI, DT))
7723 return true;
7724
7725 return false;
7726}
7727
7728bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
7729 const Instruction *CtxI,
7730 const DominatorTree *DT,
7731 unsigned Depth) {
7732 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7733 Kind: UndefPoisonKind::UndefOrPoison);
7734}
7735
7736bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
7737 const Instruction *CtxI,
7738 const DominatorTree *DT, unsigned Depth) {
7739 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7740 Kind: UndefPoisonKind::PoisonOnly);
7741}
7742
7743bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
7744 const Instruction *CtxI,
7745 const DominatorTree *DT, unsigned Depth) {
7746 return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7747 Kind: UndefPoisonKind::UndefOnly);
7748}
7749
7750/// Return true if undefined behavior would provably be executed on the path to
7751/// OnPathTo if Root produced a posion result. Note that this doesn't say
7752/// anything about whether OnPathTo is actually executed or whether Root is
7753/// actually poison. This can be used to assess whether a new use of Root can
7754/// be added at a location which is control equivalent with OnPathTo (such as
7755/// immediately before it) without introducing UB which didn't previously
7756/// exist. Note that a false result conveys no information.
7757bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
7758 Instruction *OnPathTo,
7759 DominatorTree *DT) {
7760 // Basic approach is to assume Root is poison, propagate poison forward
7761 // through all users we can easily track, and then check whether any of those
7762 // users are provable UB and must execute before out exiting block might
7763 // exit.
7764
7765 // The set of all recursive users we've visited (which are assumed to all be
7766 // poison because of said visit)
7767 SmallSet<const Value *, 16> KnownPoison;
7768 SmallVector<const Instruction*, 16> Worklist;
7769 Worklist.push_back(Elt: Root);
7770 while (!Worklist.empty()) {
7771 const Instruction *I = Worklist.pop_back_val();
7772
7773 // If we know this must trigger UB on a path leading our target.
7774 if (mustTriggerUB(I, KnownPoison) && DT->dominates(Def: I, User: OnPathTo))
7775 return true;
7776
7777 // If we can't analyze propagation through this instruction, just skip it
7778 // and transitive users. Safe as false is a conservative result.
7779 if (I != Root && !any_of(Range: I->operands(), P: [&KnownPoison](const Use &U) {
7780 return KnownPoison.contains(Ptr: U) && propagatesPoison(PoisonOp: U);
7781 }))
7782 continue;
7783
7784 if (KnownPoison.insert(Ptr: I).second)
7785 for (const User *User : I->users())
7786 Worklist.push_back(Elt: cast<Instruction>(Val: User));
7787 }
7788
7789 // Might be non-UB, or might have a path we couldn't prove must execute on
7790 // way to exiting bb.
7791 return false;
7792}
7793
7794OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
7795 const SimplifyQuery &SQ) {
7796 return ::computeOverflowForSignedAdd(LHS: Add->getOperand(i_nocapture: 0), RHS: Add->getOperand(i_nocapture: 1),
7797 Add, SQ);
7798}
7799
7800OverflowResult
7801llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7802 const WithCache<const Value *> &RHS,
7803 const SimplifyQuery &SQ) {
7804 return ::computeOverflowForSignedAdd(LHS, RHS, Add: nullptr, SQ);
7805}
7806
7807bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
7808 // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7809 // of time because it's possible for another thread to interfere with it for an
7810 // arbitrary length of time, but programs aren't allowed to rely on that.
7811
7812 // If there is no successor, then execution can't transfer to it.
7813 if (isa<ReturnInst>(Val: I))
7814 return false;
7815 if (isa<UnreachableInst>(Val: I))
7816 return false;
7817
7818 // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7819 // Instruction::willReturn.
7820 //
7821 // FIXME: Move this check into Instruction::willReturn.
7822 if (isa<CatchPadInst>(Val: I)) {
7823 switch (classifyEHPersonality(Pers: I->getFunction()->getPersonalityFn())) {
7824 default:
7825 // A catchpad may invoke exception object constructors and such, which
7826 // in some languages can be arbitrary code, so be conservative by default.
7827 return false;
7828 case EHPersonality::CoreCLR:
7829 // For CoreCLR, it just involves a type test.
7830 return true;
7831 }
7832 }
7833
7834 // An instruction that returns without throwing must transfer control flow
7835 // to a successor.
7836 return !I->mayThrow() && I->willReturn();
7837}
7838
7839bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
7840 // TODO: This is slightly conservative for invoke instruction since exiting
7841 // via an exception *is* normal control for them.
7842 for (const Instruction &I : *BB)
7843 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7844 return false;
7845 return true;
7846}
7847
7848bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7849 BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
7850 unsigned ScanLimit) {
7851 return isGuaranteedToTransferExecutionToSuccessor(Range: make_range(x: Begin, y: End),
7852 ScanLimit);
7853}
7854
7855bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7856 iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
7857 assert(ScanLimit && "scan limit must be non-zero");
7858 for (const Instruction &I : Range) {
7859 if (--ScanLimit == 0)
7860 return false;
7861 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
7862 return false;
7863 }
7864 return true;
7865}
7866
7867bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
7868 const Loop *L) {
7869 // The loop header is guaranteed to be executed for every iteration.
7870 //
7871 // FIXME: Relax this constraint to cover all basic blocks that are
7872 // guaranteed to be executed at every iteration.
7873 if (I->getParent() != L->getHeader()) return false;
7874
7875 for (const Instruction &LI : *L->getHeader()) {
7876 if (&LI == I) return true;
7877 if (!isGuaranteedToTransferExecutionToSuccessor(I: &LI)) return false;
7878 }
7879 llvm_unreachable("Instruction not contained in its own parent basic block.");
7880}
7881
7882bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
7883 switch (IID) {
7884 // TODO: Add more intrinsics.
7885 case Intrinsic::sadd_with_overflow:
7886 case Intrinsic::ssub_with_overflow:
7887 case Intrinsic::smul_with_overflow:
7888 case Intrinsic::uadd_with_overflow:
7889 case Intrinsic::usub_with_overflow:
7890 case Intrinsic::umul_with_overflow:
7891 // If an input is a vector containing a poison element, the
7892 // two output vectors (calculated results, overflow bits)'
7893 // corresponding lanes are poison.
7894 return true;
7895 case Intrinsic::ctpop:
7896 case Intrinsic::ctlz:
7897 case Intrinsic::cttz:
7898 case Intrinsic::abs:
7899 case Intrinsic::smax:
7900 case Intrinsic::smin:
7901 case Intrinsic::umax:
7902 case Intrinsic::umin:
7903 case Intrinsic::scmp:
7904 case Intrinsic::ucmp:
7905 case Intrinsic::bitreverse:
7906 case Intrinsic::bswap:
7907 case Intrinsic::sadd_sat:
7908 case Intrinsic::ssub_sat:
7909 case Intrinsic::sshl_sat:
7910 case Intrinsic::uadd_sat:
7911 case Intrinsic::usub_sat:
7912 case Intrinsic::ushl_sat:
7913 case Intrinsic::smul_fix:
7914 case Intrinsic::smul_fix_sat:
7915 case Intrinsic::canonicalize:
7916 case Intrinsic::sqrt:
7917 return true;
7918 default:
7919 return false;
7920 }
7921}
7922
7923bool llvm::propagatesPoison(const Use &PoisonOp) {
7924 const Operator *I = cast<Operator>(Val: PoisonOp.getUser());
7925 switch (I->getOpcode()) {
7926 case Instruction::Freeze:
7927 case Instruction::PHI:
7928 case Instruction::Invoke:
7929 return false;
7930 case Instruction::Select:
7931 return PoisonOp.getOperandNo() == 0;
7932 case Instruction::Call:
7933 if (auto *II = dyn_cast<IntrinsicInst>(Val: I))
7934 return intrinsicPropagatesPoison(IID: II->getIntrinsicID());
7935 return false;
7936 case Instruction::ICmp:
7937 case Instruction::FCmp:
7938 case Instruction::GetElementPtr:
7939 return true;
7940 default:
7941 if (isa<BinaryOperator>(Val: I) || isa<UnaryOperator>(Val: I) || isa<CastInst>(Val: I))
7942 return true;
7943
7944 // Be conservative and return false.
7945 return false;
7946 }
7947}
7948
7949/// Enumerates all operands of \p I that are guaranteed to not be undef or
7950/// poison. If the callback \p Handle returns true, stop processing and return
7951/// true. Otherwise, return false.
7952template <typename CallableT>
7953static bool handleGuaranteedWellDefinedOps(const Instruction *I,
7954 const CallableT &Handle) {
7955 switch (I->getOpcode()) {
7956 case Instruction::Store:
7957 if (Handle(cast<StoreInst>(Val: I)->getPointerOperand()))
7958 return true;
7959 break;
7960
7961 case Instruction::Load:
7962 if (Handle(cast<LoadInst>(Val: I)->getPointerOperand()))
7963 return true;
7964 break;
7965
7966 // Since dereferenceable attribute imply noundef, atomic operations
7967 // also implicitly have noundef pointers too
7968 case Instruction::AtomicCmpXchg:
7969 if (Handle(cast<AtomicCmpXchgInst>(Val: I)->getPointerOperand()))
7970 return true;
7971 break;
7972
7973 case Instruction::AtomicRMW:
7974 if (Handle(cast<AtomicRMWInst>(Val: I)->getPointerOperand()))
7975 return true;
7976 break;
7977
7978 case Instruction::Call:
7979 case Instruction::Invoke: {
7980 const CallBase *CB = cast<CallBase>(Val: I);
7981 if (CB->isIndirectCall() && Handle(CB->getCalledOperand()))
7982 return true;
7983 for (unsigned i = 0; i < CB->arg_size(); ++i)
7984 if ((CB->paramHasAttr(ArgNo: i, Kind: Attribute::NoUndef) ||
7985 CB->paramHasAttr(ArgNo: i, Kind: Attribute::Dereferenceable) ||
7986 CB->paramHasAttr(ArgNo: i, Kind: Attribute::DereferenceableOrNull)) &&
7987 Handle(CB->getArgOperand(i)))
7988 return true;
7989 break;
7990 }
7991 case Instruction::Ret:
7992 if (I->getFunction()->hasRetAttribute(Kind: Attribute::NoUndef) &&
7993 Handle(I->getOperand(i: 0)))
7994 return true;
7995 break;
7996 case Instruction::Switch:
7997 if (Handle(cast<SwitchInst>(Val: I)->getCondition()))
7998 return true;
7999 break;
8000 case Instruction::Br: {
8001 auto *BR = cast<BranchInst>(Val: I);
8002 if (BR->isConditional() && Handle(BR->getCondition()))
8003 return true;
8004 break;
8005 }
8006 default:
8007 break;
8008 }
8009
8010 return false;
8011}
8012
8013/// Enumerates all operands of \p I that are guaranteed to not be poison.
8014template <typename CallableT>
8015static bool handleGuaranteedNonPoisonOps(const Instruction *I,
8016 const CallableT &Handle) {
8017 if (handleGuaranteedWellDefinedOps(I, Handle))
8018 return true;
8019 switch (I->getOpcode()) {
8020 // Divisors of these operations are allowed to be partially undef.
8021 case Instruction::UDiv:
8022 case Instruction::SDiv:
8023 case Instruction::URem:
8024 case Instruction::SRem:
8025 return Handle(I->getOperand(i: 1));
8026 default:
8027 return false;
8028 }
8029}
8030
8031bool llvm::mustTriggerUB(const Instruction *I,
8032 const SmallPtrSetImpl<const Value *> &KnownPoison) {
8033 return handleGuaranteedNonPoisonOps(
8034 I, Handle: [&](const Value *V) { return KnownPoison.count(Ptr: V); });
8035}
8036
8037static bool programUndefinedIfUndefOrPoison(const Value *V,
8038 bool PoisonOnly) {
8039 // We currently only look for uses of values within the same basic
8040 // block, as that makes it easier to guarantee that the uses will be
8041 // executed given that Inst is executed.
8042 //
8043 // FIXME: Expand this to consider uses beyond the same basic block. To do
8044 // this, look out for the distinction between post-dominance and strong
8045 // post-dominance.
8046 const BasicBlock *BB = nullptr;
8047 BasicBlock::const_iterator Begin;
8048 if (const auto *Inst = dyn_cast<Instruction>(Val: V)) {
8049 BB = Inst->getParent();
8050 Begin = Inst->getIterator();
8051 Begin++;
8052 } else if (const auto *Arg = dyn_cast<Argument>(Val: V)) {
8053 if (Arg->getParent()->isDeclaration())
8054 return false;
8055 BB = &Arg->getParent()->getEntryBlock();
8056 Begin = BB->begin();
8057 } else {
8058 return false;
8059 }
8060
8061 // Limit number of instructions we look at, to avoid scanning through large
8062 // blocks. The current limit is chosen arbitrarily.
8063 unsigned ScanLimit = 32;
8064 BasicBlock::const_iterator End = BB->end();
8065
8066 if (!PoisonOnly) {
8067 // Since undef does not propagate eagerly, be conservative & just check
8068 // whether a value is directly passed to an instruction that must take
8069 // well-defined operands.
8070
8071 for (const auto &I : make_range(x: Begin, y: End)) {
8072 if (--ScanLimit == 0)
8073 break;
8074
8075 if (handleGuaranteedWellDefinedOps(I: &I, Handle: [V](const Value *WellDefinedOp) {
8076 return WellDefinedOp == V;
8077 }))
8078 return true;
8079
8080 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
8081 break;
8082 }
8083 return false;
8084 }
8085
8086 // Set of instructions that we have proved will yield poison if Inst
8087 // does.
8088 SmallSet<const Value *, 16> YieldsPoison;
8089 SmallSet<const BasicBlock *, 4> Visited;
8090
8091 YieldsPoison.insert(Ptr: V);
8092 Visited.insert(Ptr: BB);
8093
8094 while (true) {
8095 for (const auto &I : make_range(x: Begin, y: End)) {
8096 if (--ScanLimit == 0)
8097 return false;
8098 if (mustTriggerUB(I: &I, KnownPoison: YieldsPoison))
8099 return true;
8100 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
8101 return false;
8102
8103 // If an operand is poison and propagates it, mark I as yielding poison.
8104 for (const Use &Op : I.operands()) {
8105 if (YieldsPoison.count(Ptr: Op) && propagatesPoison(PoisonOp: Op)) {
8106 YieldsPoison.insert(Ptr: &I);
8107 break;
8108 }
8109 }
8110
8111 // Special handling for select, which returns poison if its operand 0 is
8112 // poison (handled in the loop above) *or* if both its true/false operands
8113 // are poison (handled here).
8114 if (I.getOpcode() == Instruction::Select &&
8115 YieldsPoison.count(Ptr: I.getOperand(i: 1)) &&
8116 YieldsPoison.count(Ptr: I.getOperand(i: 2))) {
8117 YieldsPoison.insert(Ptr: &I);
8118 }
8119 }
8120
8121 BB = BB->getSingleSuccessor();
8122 if (!BB || !Visited.insert(Ptr: BB).second)
8123 break;
8124
8125 Begin = BB->getFirstNonPHIIt();
8126 End = BB->end();
8127 }
8128 return false;
8129}
8130
8131bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
8132 return ::programUndefinedIfUndefOrPoison(V: Inst, PoisonOnly: false);
8133}
8134
8135bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
8136 return ::programUndefinedIfUndefOrPoison(V: Inst, PoisonOnly: true);
8137}
8138
8139static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
8140 if (FMF.noNaNs())
8141 return true;
8142
8143 if (auto *C = dyn_cast<ConstantFP>(Val: V))
8144 return !C->isNaN();
8145
8146 if (auto *C = dyn_cast<ConstantDataVector>(Val: V)) {
8147 if (!C->getElementType()->isFloatingPointTy())
8148 return false;
8149 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8150 if (C->getElementAsAPFloat(i: I).isNaN())
8151 return false;
8152 }
8153 return true;
8154 }
8155
8156 if (isa<ConstantAggregateZero>(Val: V))
8157 return true;
8158
8159 return false;
8160}
8161
8162static bool isKnownNonZero(const Value *V) {
8163 if (auto *C = dyn_cast<ConstantFP>(Val: V))
8164 return !C->isZero();
8165
8166 if (auto *C = dyn_cast<ConstantDataVector>(Val: V)) {
8167 if (!C->getElementType()->isFloatingPointTy())
8168 return false;
8169 for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
8170 if (C->getElementAsAPFloat(i: I).isZero())
8171 return false;
8172 }
8173 return true;
8174 }
8175
8176 return false;
8177}
8178
8179/// Match clamp pattern for float types without care about NaNs or signed zeros.
8180/// Given non-min/max outer cmp/select from the clamp pattern this
8181/// function recognizes if it can be substitued by a "canonical" min/max
8182/// pattern.
8183static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
8184 Value *CmpLHS, Value *CmpRHS,
8185 Value *TrueVal, Value *FalseVal,
8186 Value *&LHS, Value *&RHS) {
8187 // Try to match
8188 // X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8189 // X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8190 // and return description of the outer Max/Min.
8191
8192 // First, check if select has inverse order:
8193 if (CmpRHS == FalseVal) {
8194 std::swap(a&: TrueVal, b&: FalseVal);
8195 Pred = CmpInst::getInversePredicate(pred: Pred);
8196 }
8197
8198 // Assume success now. If there's no match, callers should not use these anyway.
8199 LHS = TrueVal;
8200 RHS = FalseVal;
8201
8202 const APFloat *FC1;
8203 if (CmpRHS != TrueVal || !match(V: CmpRHS, P: m_APFloat(Res&: FC1)) || !FC1->isFinite())
8204 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8205
8206 const APFloat *FC2;
8207 switch (Pred) {
8208 case CmpInst::FCMP_OLT:
8209 case CmpInst::FCMP_OLE:
8210 case CmpInst::FCMP_ULT:
8211 case CmpInst::FCMP_ULE:
8212 if (match(V: FalseVal, P: m_OrdOrUnordFMin(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2))) &&
8213 *FC1 < *FC2)
8214 return {.Flavor: SPF_FMAXNUM, .NaNBehavior: SPNB_RETURNS_ANY, .Ordered: false};
8215 break;
8216 case CmpInst::FCMP_OGT:
8217 case CmpInst::FCMP_OGE:
8218 case CmpInst::FCMP_UGT:
8219 case CmpInst::FCMP_UGE:
8220 if (match(V: FalseVal, P: m_OrdOrUnordFMax(L: m_Specific(V: CmpLHS), R: m_APFloat(Res&: FC2))) &&
8221 *FC1 > *FC2)
8222 return {.Flavor: SPF_FMINNUM, .NaNBehavior: SPNB_RETURNS_ANY, .Ordered: false};
8223 break;
8224 default:
8225 break;
8226 }
8227
8228 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8229}
8230
8231/// Recognize variations of:
8232/// CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8233static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
8234 Value *CmpLHS, Value *CmpRHS,
8235 Value *TrueVal, Value *FalseVal) {
8236 // Swap the select operands and predicate to match the patterns below.
8237 if (CmpRHS != TrueVal) {
8238 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
8239 std::swap(a&: TrueVal, b&: FalseVal);
8240 }
8241 const APInt *C1;
8242 if (CmpRHS == TrueVal && match(V: CmpRHS, P: m_APInt(Res&: C1))) {
8243 const APInt *C2;
8244 // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8245 if (match(V: FalseVal, P: m_SMin(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
8246 C1->slt(RHS: *C2) && Pred == CmpInst::ICMP_SLT)
8247 return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8248
8249 // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8250 if (match(V: FalseVal, P: m_SMax(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
8251 C1->sgt(RHS: *C2) && Pred == CmpInst::ICMP_SGT)
8252 return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8253
8254 // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8255 if (match(V: FalseVal, P: m_UMin(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
8256 C1->ult(RHS: *C2) && Pred == CmpInst::ICMP_ULT)
8257 return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8258
8259 // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8260 if (match(V: FalseVal, P: m_UMax(L: m_Specific(V: CmpLHS), R: m_APInt(Res&: C2))) &&
8261 C1->ugt(RHS: *C2) && Pred == CmpInst::ICMP_UGT)
8262 return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8263 }
8264 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8265}
8266
8267/// Recognize variations of:
8268/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8269static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
8270 Value *CmpLHS, Value *CmpRHS,
8271 Value *TVal, Value *FVal,
8272 unsigned Depth) {
8273 // TODO: Allow FP min/max with nnan/nsz.
8274 assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
8275
8276 Value *A = nullptr, *B = nullptr;
8277 SelectPatternResult L = matchSelectPattern(V: TVal, LHS&: A, RHS&: B, CastOp: nullptr, Depth: Depth + 1);
8278 if (!SelectPatternResult::isMinOrMax(SPF: L.Flavor))
8279 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8280
8281 Value *C = nullptr, *D = nullptr;
8282 SelectPatternResult R = matchSelectPattern(V: FVal, LHS&: C, RHS&: D, CastOp: nullptr, Depth: Depth + 1);
8283 if (L.Flavor != R.Flavor)
8284 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8285
8286 // We have something like: x Pred y ? min(a, b) : min(c, d).
8287 // Try to match the compare to the min/max operations of the select operands.
8288 // First, make sure we have the right compare predicate.
8289 switch (L.Flavor) {
8290 case SPF_SMIN:
8291 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
8292 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
8293 std::swap(a&: CmpLHS, b&: CmpRHS);
8294 }
8295 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
8296 break;
8297 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8298 case SPF_SMAX:
8299 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
8300 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
8301 std::swap(a&: CmpLHS, b&: CmpRHS);
8302 }
8303 if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
8304 break;
8305 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8306 case SPF_UMIN:
8307 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
8308 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
8309 std::swap(a&: CmpLHS, b&: CmpRHS);
8310 }
8311 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
8312 break;
8313 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8314 case SPF_UMAX:
8315 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
8316 Pred = ICmpInst::getSwappedPredicate(pred: Pred);
8317 std::swap(a&: CmpLHS, b&: CmpRHS);
8318 }
8319 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
8320 break;
8321 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8322 default:
8323 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8324 }
8325
8326 // If there is a common operand in the already matched min/max and the other
8327 // min/max operands match the compare operands (either directly or inverted),
8328 // then this is min/max of the same flavor.
8329
8330 // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8331 // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8332 if (D == B) {
8333 if ((CmpLHS == A && CmpRHS == C) || (match(V: C, P: m_Not(V: m_Specific(V: CmpLHS))) &&
8334 match(V: A, P: m_Not(V: m_Specific(V: CmpRHS)))))
8335 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
8336 }
8337 // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8338 // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8339 if (C == B) {
8340 if ((CmpLHS == A && CmpRHS == D) || (match(V: D, P: m_Not(V: m_Specific(V: CmpLHS))) &&
8341 match(V: A, P: m_Not(V: m_Specific(V: CmpRHS)))))
8342 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
8343 }
8344 // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8345 // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8346 if (D == A) {
8347 if ((CmpLHS == B && CmpRHS == C) || (match(V: C, P: m_Not(V: m_Specific(V: CmpLHS))) &&
8348 match(V: B, P: m_Not(V: m_Specific(V: CmpRHS)))))
8349 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
8350 }
8351 // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8352 // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8353 if (C == A) {
8354 if ((CmpLHS == B && CmpRHS == D) || (match(V: D, P: m_Not(V: m_Specific(V: CmpLHS))) &&
8355 match(V: B, P: m_Not(V: m_Specific(V: CmpRHS)))))
8356 return {.Flavor: L.Flavor, .NaNBehavior: SPNB_NA, .Ordered: false};
8357 }
8358
8359 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8360}
8361
8362/// If the input value is the result of a 'not' op, constant integer, or vector
8363/// splat of a constant integer, return the bitwise-not source value.
8364/// TODO: This could be extended to handle non-splat vector integer constants.
8365static Value *getNotValue(Value *V) {
8366 Value *NotV;
8367 if (match(V, P: m_Not(V: m_Value(V&: NotV))))
8368 return NotV;
8369
8370 const APInt *C;
8371 if (match(V, P: m_APInt(Res&: C)))
8372 return ConstantInt::get(Ty: V->getType(), V: ~(*C));
8373
8374 return nullptr;
8375}
8376
8377/// Match non-obvious integer minimum and maximum sequences.
8378static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
8379 Value *CmpLHS, Value *CmpRHS,
8380 Value *TrueVal, Value *FalseVal,
8381 Value *&LHS, Value *&RHS,
8382 unsigned Depth) {
8383 // Assume success. If there's no match, callers should not use these anyway.
8384 LHS = TrueVal;
8385 RHS = FalseVal;
8386
8387 SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
8388 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8389 return SPR;
8390
8391 SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TVal: TrueVal, FVal: FalseVal, Depth);
8392 if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8393 return SPR;
8394
8395 // Look through 'not' ops to find disguised min/max.
8396 // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8397 // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8398 if (CmpLHS == getNotValue(V: TrueVal) && CmpRHS == getNotValue(V: FalseVal)) {
8399 switch (Pred) {
8400 case CmpInst::ICMP_SGT: return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8401 case CmpInst::ICMP_SLT: return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8402 case CmpInst::ICMP_UGT: return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8403 case CmpInst::ICMP_ULT: return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8404 default: break;
8405 }
8406 }
8407
8408 // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8409 // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8410 if (CmpLHS == getNotValue(V: FalseVal) && CmpRHS == getNotValue(V: TrueVal)) {
8411 switch (Pred) {
8412 case CmpInst::ICMP_SGT: return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8413 case CmpInst::ICMP_SLT: return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8414 case CmpInst::ICMP_UGT: return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8415 case CmpInst::ICMP_ULT: return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8416 default: break;
8417 }
8418 }
8419
8420 if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
8421 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8422
8423 const APInt *C1;
8424 if (!match(V: CmpRHS, P: m_APInt(Res&: C1)))
8425 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8426
8427 // An unsigned min/max can be written with a signed compare.
8428 const APInt *C2;
8429 if ((CmpLHS == TrueVal && match(V: FalseVal, P: m_APInt(Res&: C2))) ||
8430 (CmpLHS == FalseVal && match(V: TrueVal, P: m_APInt(Res&: C2)))) {
8431 // Is the sign bit set?
8432 // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8433 // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8434 if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
8435 return {.Flavor: CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8436
8437 // Is the sign bit clear?
8438 // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8439 // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8440 if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
8441 return {.Flavor: CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8442 }
8443
8444 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8445}
8446
8447bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW,
8448 bool AllowPoison) {
8449 assert(X && Y && "Invalid operand");
8450
8451 auto IsNegationOf = [&](const Value *X, const Value *Y) {
8452 if (!match(V: X, P: m_Neg(V: m_Specific(V: Y))))
8453 return false;
8454
8455 auto *BO = cast<BinaryOperator>(Val: X);
8456 if (NeedNSW && !BO->hasNoSignedWrap())
8457 return false;
8458
8459 auto *Zero = cast<Constant>(Val: BO->getOperand(i_nocapture: 0));
8460 if (!AllowPoison && !Zero->isNullValue())
8461 return false;
8462
8463 return true;
8464 };
8465
8466 // X = -Y or Y = -X
8467 if (IsNegationOf(X, Y) || IsNegationOf(Y, X))
8468 return true;
8469
8470 // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8471 Value *A, *B;
8472 return (!NeedNSW && (match(V: X, P: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B))) &&
8473 match(V: Y, P: m_Sub(L: m_Specific(V: B), R: m_Specific(V: A))))) ||
8474 (NeedNSW && (match(V: X, P: m_NSWSub(L: m_Value(V&: A), R: m_Value(V&: B))) &&
8475 match(V: Y, P: m_NSWSub(L: m_Specific(V: B), R: m_Specific(V: A)))));
8476}
8477
8478bool llvm::isKnownInversion(const Value *X, const Value *Y) {
8479 // Handle X = icmp pred A, B, Y = icmp pred A, C.
8480 Value *A, *B, *C;
8481 CmpPredicate Pred1, Pred2;
8482 if (!match(V: X, P: m_ICmp(Pred&: Pred1, L: m_Value(V&: A), R: m_Value(V&: B))) ||
8483 !match(V: Y, P: m_c_ICmp(Pred&: Pred2, L: m_Specific(V: A), R: m_Value(V&: C))))
8484 return false;
8485
8486 // They must both have samesign flag or not.
8487 if (Pred1.hasSameSign() != Pred2.hasSameSign())
8488 return false;
8489
8490 if (B == C)
8491 return Pred1 == ICmpInst::getInversePredicate(pred: Pred2);
8492
8493 // Try to infer the relationship from constant ranges.
8494 const APInt *RHSC1, *RHSC2;
8495 if (!match(V: B, P: m_APInt(Res&: RHSC1)) || !match(V: C, P: m_APInt(Res&: RHSC2)))
8496 return false;
8497
8498 // Sign bits of two RHSCs should match.
8499 if (Pred1.hasSameSign() && RHSC1->isNonNegative() != RHSC2->isNonNegative())
8500 return false;
8501
8502 const auto CR1 = ConstantRange::makeExactICmpRegion(Pred: Pred1, Other: *RHSC1);
8503 const auto CR2 = ConstantRange::makeExactICmpRegion(Pred: Pred2, Other: *RHSC2);
8504
8505 return CR1.inverse() == CR2;
8506}
8507
8508SelectPatternResult llvm::getSelectPattern(CmpInst::Predicate Pred,
8509 SelectPatternNaNBehavior NaNBehavior,
8510 bool Ordered) {
8511 switch (Pred) {
8512 default:
8513 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false}; // Equality.
8514 case ICmpInst::ICMP_UGT:
8515 case ICmpInst::ICMP_UGE:
8516 return {.Flavor: SPF_UMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8517 case ICmpInst::ICMP_SGT:
8518 case ICmpInst::ICMP_SGE:
8519 return {.Flavor: SPF_SMAX, .NaNBehavior: SPNB_NA, .Ordered: false};
8520 case ICmpInst::ICMP_ULT:
8521 case ICmpInst::ICMP_ULE:
8522 return {.Flavor: SPF_UMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8523 case ICmpInst::ICMP_SLT:
8524 case ICmpInst::ICMP_SLE:
8525 return {.Flavor: SPF_SMIN, .NaNBehavior: SPNB_NA, .Ordered: false};
8526 case FCmpInst::FCMP_UGT:
8527 case FCmpInst::FCMP_UGE:
8528 case FCmpInst::FCMP_OGT:
8529 case FCmpInst::FCMP_OGE:
8530 return {.Flavor: SPF_FMAXNUM, .NaNBehavior: NaNBehavior, .Ordered: Ordered};
8531 case FCmpInst::FCMP_ULT:
8532 case FCmpInst::FCMP_ULE:
8533 case FCmpInst::FCMP_OLT:
8534 case FCmpInst::FCMP_OLE:
8535 return {.Flavor: SPF_FMINNUM, .NaNBehavior: NaNBehavior, .Ordered: Ordered};
8536 }
8537}
8538
8539std::optional<std::pair<CmpPredicate, Constant *>>
8540llvm::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred, Constant *C) {
8541 assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
8542 "Only for relational integer predicates.");
8543 if (isa<UndefValue>(Val: C))
8544 return std::nullopt;
8545
8546 Type *Type = C->getType();
8547 bool IsSigned = ICmpInst::isSigned(predicate: Pred);
8548
8549 CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
8550 bool WillIncrement =
8551 UnsignedPred == ICmpInst::ICMP_ULE || UnsignedPred == ICmpInst::ICMP_UGT;
8552
8553 // Check if the constant operand can be safely incremented/decremented
8554 // without overflowing/underflowing.
8555 auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
8556 return WillIncrement ? !C->isMaxValue(IsSigned) : !C->isMinValue(IsSigned);
8557 };
8558
8559 Constant *SafeReplacementConstant = nullptr;
8560 if (auto *CI = dyn_cast<ConstantInt>(Val: C)) {
8561 // Bail out if the constant can't be safely incremented/decremented.
8562 if (!ConstantIsOk(CI))
8563 return std::nullopt;
8564 } else if (auto *FVTy = dyn_cast<FixedVectorType>(Val: Type)) {
8565 unsigned NumElts = FVTy->getNumElements();
8566 for (unsigned i = 0; i != NumElts; ++i) {
8567 Constant *Elt = C->getAggregateElement(Elt: i);
8568 if (!Elt)
8569 return std::nullopt;
8570
8571 if (isa<UndefValue>(Val: Elt))
8572 continue;
8573
8574 // Bail out if we can't determine if this constant is min/max or if we
8575 // know that this constant is min/max.
8576 auto *CI = dyn_cast<ConstantInt>(Val: Elt);
8577 if (!CI || !ConstantIsOk(CI))
8578 return std::nullopt;
8579
8580 if (!SafeReplacementConstant)
8581 SafeReplacementConstant = CI;
8582 }
8583 } else if (isa<VectorType>(Val: C->getType())) {
8584 // Handle scalable splat
8585 Value *SplatC = C->getSplatValue();
8586 auto *CI = dyn_cast_or_null<ConstantInt>(Val: SplatC);
8587 // Bail out if the constant can't be safely incremented/decremented.
8588 if (!CI || !ConstantIsOk(CI))
8589 return std::nullopt;
8590 } else {
8591 // ConstantExpr?
8592 return std::nullopt;
8593 }
8594
8595 // It may not be safe to change a compare predicate in the presence of
8596 // undefined elements, so replace those elements with the first safe constant
8597 // that we found.
8598 // TODO: in case of poison, it is safe; let's replace undefs only.
8599 if (C->containsUndefOrPoisonElement()) {
8600 assert(SafeReplacementConstant && "Replacement constant not set");
8601 C = Constant::replaceUndefsWith(C, Replacement: SafeReplacementConstant);
8602 }
8603
8604 CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(pred: Pred);
8605
8606 // Increment or decrement the constant.
8607 Constant *OneOrNegOne = ConstantInt::get(Ty: Type, V: WillIncrement ? 1 : -1, IsSigned: true);
8608 Constant *NewC = ConstantExpr::getAdd(C1: C, C2: OneOrNegOne);
8609
8610 return std::make_pair(x&: NewPred, y&: NewC);
8611}
8612
8613static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
8614 FastMathFlags FMF,
8615 Value *CmpLHS, Value *CmpRHS,
8616 Value *TrueVal, Value *FalseVal,
8617 Value *&LHS, Value *&RHS,
8618 unsigned Depth) {
8619 bool HasMismatchedZeros = false;
8620 if (CmpInst::isFPPredicate(P: Pred)) {
8621 // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8622 // 0.0 operand, set the compare's 0.0 operands to that same value for the
8623 // purpose of identifying min/max. Disregard vector constants with undefined
8624 // elements because those can not be back-propagated for analysis.
8625 Value *OutputZeroVal = nullptr;
8626 if (match(V: TrueVal, P: m_AnyZeroFP()) && !match(V: FalseVal, P: m_AnyZeroFP()) &&
8627 !cast<Constant>(Val: TrueVal)->containsUndefOrPoisonElement())
8628 OutputZeroVal = TrueVal;
8629 else if (match(V: FalseVal, P: m_AnyZeroFP()) && !match(V: TrueVal, P: m_AnyZeroFP()) &&
8630 !cast<Constant>(Val: FalseVal)->containsUndefOrPoisonElement())
8631 OutputZeroVal = FalseVal;
8632
8633 if (OutputZeroVal) {
8634 if (match(V: CmpLHS, P: m_AnyZeroFP()) && CmpLHS != OutputZeroVal) {
8635 HasMismatchedZeros = true;
8636 CmpLHS = OutputZeroVal;
8637 }
8638 if (match(V: CmpRHS, P: m_AnyZeroFP()) && CmpRHS != OutputZeroVal) {
8639 HasMismatchedZeros = true;
8640 CmpRHS = OutputZeroVal;
8641 }
8642 }
8643 }
8644
8645 LHS = CmpLHS;
8646 RHS = CmpRHS;
8647
8648 // Signed zero may return inconsistent results between implementations.
8649 // (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8650 // minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8651 // Therefore, we behave conservatively and only proceed if at least one of the
8652 // operands is known to not be zero or if we don't care about signed zero.
8653 switch (Pred) {
8654 default: break;
8655 case CmpInst::FCMP_OGT: case CmpInst::FCMP_OLT:
8656 case CmpInst::FCMP_UGT: case CmpInst::FCMP_ULT:
8657 if (!HasMismatchedZeros)
8658 break;
8659 [[fallthrough]];
8660 case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
8661 case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
8662 if (!FMF.noSignedZeros() && !isKnownNonZero(V: CmpLHS) &&
8663 !isKnownNonZero(V: CmpRHS))
8664 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8665 }
8666
8667 SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
8668 bool Ordered = false;
8669
8670 // When given one NaN and one non-NaN input:
8671 // - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8672 // - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8673 // ordered comparison fails), which could be NaN or non-NaN.
8674 // so here we discover exactly what NaN behavior is required/accepted.
8675 if (CmpInst::isFPPredicate(P: Pred)) {
8676 bool LHSSafe = isKnownNonNaN(V: CmpLHS, FMF);
8677 bool RHSSafe = isKnownNonNaN(V: CmpRHS, FMF);
8678
8679 if (LHSSafe && RHSSafe) {
8680 // Both operands are known non-NaN.
8681 NaNBehavior = SPNB_RETURNS_ANY;
8682 Ordered = CmpInst::isOrdered(predicate: Pred);
8683 } else if (CmpInst::isOrdered(predicate: Pred)) {
8684 // An ordered comparison will return false when given a NaN, so it
8685 // returns the RHS.
8686 Ordered = true;
8687 if (LHSSafe)
8688 // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8689 NaNBehavior = SPNB_RETURNS_NAN;
8690 else if (RHSSafe)
8691 NaNBehavior = SPNB_RETURNS_OTHER;
8692 else
8693 // Completely unsafe.
8694 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8695 } else {
8696 Ordered = false;
8697 // An unordered comparison will return true when given a NaN, so it
8698 // returns the LHS.
8699 if (LHSSafe)
8700 // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8701 NaNBehavior = SPNB_RETURNS_OTHER;
8702 else if (RHSSafe)
8703 NaNBehavior = SPNB_RETURNS_NAN;
8704 else
8705 // Completely unsafe.
8706 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8707 }
8708 }
8709
8710 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
8711 std::swap(a&: CmpLHS, b&: CmpRHS);
8712 Pred = CmpInst::getSwappedPredicate(pred: Pred);
8713 if (NaNBehavior == SPNB_RETURNS_NAN)
8714 NaNBehavior = SPNB_RETURNS_OTHER;
8715 else if (NaNBehavior == SPNB_RETURNS_OTHER)
8716 NaNBehavior = SPNB_RETURNS_NAN;
8717 Ordered = !Ordered;
8718 }
8719
8720 // ([if]cmp X, Y) ? X : Y
8721 if (TrueVal == CmpLHS && FalseVal == CmpRHS)
8722 return getSelectPattern(Pred, NaNBehavior, Ordered);
8723
8724 if (isKnownNegation(X: TrueVal, Y: FalseVal)) {
8725 // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8726 // match against either LHS or sext(LHS).
8727 auto MaybeSExtCmpLHS =
8728 m_CombineOr(L: m_Specific(V: CmpLHS), R: m_SExt(Op: m_Specific(V: CmpLHS)));
8729 auto ZeroOrAllOnes = m_CombineOr(L: m_ZeroInt(), R: m_AllOnes());
8730 auto ZeroOrOne = m_CombineOr(L: m_ZeroInt(), R: m_One());
8731 if (match(V: TrueVal, P: MaybeSExtCmpLHS)) {
8732 // Set the return values. If the compare uses the negated value (-X >s 0),
8733 // swap the return values because the negated value is always 'RHS'.
8734 LHS = TrueVal;
8735 RHS = FalseVal;
8736 if (match(V: CmpLHS, P: m_Neg(V: m_Specific(V: FalseVal))))
8737 std::swap(a&: LHS, b&: RHS);
8738
8739 // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8740 // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8741 if (Pred == ICmpInst::ICMP_SGT && match(V: CmpRHS, P: ZeroOrAllOnes))
8742 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8743
8744 // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8745 if (Pred == ICmpInst::ICMP_SGE && match(V: CmpRHS, P: ZeroOrOne))
8746 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8747
8748 // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8749 // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8750 if (Pred == ICmpInst::ICMP_SLT && match(V: CmpRHS, P: ZeroOrOne))
8751 return {.Flavor: SPF_NABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8752 }
8753 else if (match(V: FalseVal, P: MaybeSExtCmpLHS)) {
8754 // Set the return values. If the compare uses the negated value (-X >s 0),
8755 // swap the return values because the negated value is always 'RHS'.
8756 LHS = FalseVal;
8757 RHS = TrueVal;
8758 if (match(V: CmpLHS, P: m_Neg(V: m_Specific(V: TrueVal))))
8759 std::swap(a&: LHS, b&: RHS);
8760
8761 // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8762 // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8763 if (Pred == ICmpInst::ICMP_SGT && match(V: CmpRHS, P: ZeroOrAllOnes))
8764 return {.Flavor: SPF_NABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8765
8766 // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8767 // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8768 if (Pred == ICmpInst::ICMP_SLT && match(V: CmpRHS, P: ZeroOrOne))
8769 return {.Flavor: SPF_ABS, .NaNBehavior: SPNB_NA, .Ordered: false};
8770 }
8771 }
8772
8773 if (CmpInst::isIntPredicate(P: Pred))
8774 return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
8775
8776 // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8777 // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8778 // semantics than minNum. Be conservative in such case.
8779 if (NaNBehavior != SPNB_RETURNS_ANY ||
8780 (!FMF.noSignedZeros() && !isKnownNonZero(V: CmpLHS) &&
8781 !isKnownNonZero(V: CmpRHS)))
8782 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8783
8784 return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
8785}
8786
8787static Value *lookThroughCastConst(CmpInst *CmpI, Type *SrcTy, Constant *C,
8788 Instruction::CastOps *CastOp) {
8789 const DataLayout &DL = CmpI->getDataLayout();
8790
8791 Constant *CastedTo = nullptr;
8792 switch (*CastOp) {
8793 case Instruction::ZExt:
8794 if (CmpI->isUnsigned())
8795 CastedTo = ConstantExpr::getTrunc(C, Ty: SrcTy);
8796 break;
8797 case Instruction::SExt:
8798 if (CmpI->isSigned())
8799 CastedTo = ConstantExpr::getTrunc(C, Ty: SrcTy, OnlyIfReduced: true);
8800 break;
8801 case Instruction::Trunc:
8802 Constant *CmpConst;
8803 if (match(V: CmpI->getOperand(i_nocapture: 1), P: m_Constant(C&: CmpConst)) &&
8804 CmpConst->getType() == SrcTy) {
8805 // Here we have the following case:
8806 //
8807 // %cond = cmp iN %x, CmpConst
8808 // %tr = trunc iN %x to iK
8809 // %narrowsel = select i1 %cond, iK %t, iK C
8810 //
8811 // We can always move trunc after select operation:
8812 //
8813 // %cond = cmp iN %x, CmpConst
8814 // %widesel = select i1 %cond, iN %x, iN CmpConst
8815 // %tr = trunc iN %widesel to iK
8816 //
8817 // Note that C could be extended in any way because we don't care about
8818 // upper bits after truncation. It can't be abs pattern, because it would
8819 // look like:
8820 //
8821 // select i1 %cond, x, -x.
8822 //
8823 // So only min/max pattern could be matched. Such match requires widened C
8824 // == CmpConst. That is why set widened C = CmpConst, condition trunc
8825 // CmpConst == C is checked below.
8826 CastedTo = CmpConst;
8827 } else {
8828 unsigned ExtOp = CmpI->isSigned() ? Instruction::SExt : Instruction::ZExt;
8829 CastedTo = ConstantFoldCastOperand(Opcode: ExtOp, C, DestTy: SrcTy, DL);
8830 }
8831 break;
8832 case Instruction::FPTrunc:
8833 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPExt, C, DestTy: SrcTy, DL);
8834 break;
8835 case Instruction::FPExt:
8836 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPTrunc, C, DestTy: SrcTy, DL);
8837 break;
8838 case Instruction::FPToUI:
8839 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::UIToFP, C, DestTy: SrcTy, DL);
8840 break;
8841 case Instruction::FPToSI:
8842 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::SIToFP, C, DestTy: SrcTy, DL);
8843 break;
8844 case Instruction::UIToFP:
8845 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPToUI, C, DestTy: SrcTy, DL);
8846 break;
8847 case Instruction::SIToFP:
8848 CastedTo = ConstantFoldCastOperand(Opcode: Instruction::FPToSI, C, DestTy: SrcTy, DL);
8849 break;
8850 default:
8851 break;
8852 }
8853
8854 if (!CastedTo)
8855 return nullptr;
8856
8857 // Make sure the cast doesn't lose any information.
8858 Constant *CastedBack =
8859 ConstantFoldCastOperand(Opcode: *CastOp, C: CastedTo, DestTy: C->getType(), DL);
8860 if (CastedBack && CastedBack != C)
8861 return nullptr;
8862
8863 return CastedTo;
8864}
8865
8866/// Helps to match a select pattern in case of a type mismatch.
8867///
8868/// The function processes the case when type of true and false values of a
8869/// select instruction differs from type of the cmp instruction operands because
8870/// of a cast instruction. The function checks if it is legal to move the cast
8871/// operation after "select". If yes, it returns the new second value of
8872/// "select" (with the assumption that cast is moved):
8873/// 1. As operand of cast instruction when both values of "select" are same cast
8874/// instructions.
8875/// 2. As restored constant (by applying reverse cast operation) when the first
8876/// value of the "select" is a cast operation and the second value is a
8877/// constant. It is implemented in lookThroughCastConst().
8878/// 3. As one operand is cast instruction and the other is not. The operands in
8879/// sel(cmp) are in different type integer.
8880/// NOTE: We return only the new second value because the first value could be
8881/// accessed as operand of cast instruction.
8882static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
8883 Instruction::CastOps *CastOp) {
8884 auto *Cast1 = dyn_cast<CastInst>(Val: V1);
8885 if (!Cast1)
8886 return nullptr;
8887
8888 *CastOp = Cast1->getOpcode();
8889 Type *SrcTy = Cast1->getSrcTy();
8890 if (auto *Cast2 = dyn_cast<CastInst>(Val: V2)) {
8891 // If V1 and V2 are both the same cast from the same type, look through V1.
8892 if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
8893 return Cast2->getOperand(i_nocapture: 0);
8894 return nullptr;
8895 }
8896
8897 auto *C = dyn_cast<Constant>(Val: V2);
8898 if (C)
8899 return lookThroughCastConst(CmpI, SrcTy, C, CastOp);
8900
8901 Value *CastedTo = nullptr;
8902 if (*CastOp == Instruction::Trunc) {
8903 if (match(V: CmpI->getOperand(i_nocapture: 1), P: m_ZExtOrSExt(Op: m_Specific(V: V2)))) {
8904 // Here we have the following case:
8905 // %y_ext = sext iK %y to iN
8906 // %cond = cmp iN %x, %y_ext
8907 // %tr = trunc iN %x to iK
8908 // %narrowsel = select i1 %cond, iK %tr, iK %y
8909 //
8910 // We can always move trunc after select operation:
8911 // %y_ext = sext iK %y to iN
8912 // %cond = cmp iN %x, %y_ext
8913 // %widesel = select i1 %cond, iN %x, iN %y_ext
8914 // %tr = trunc iN %widesel to iK
8915 assert(V2->getType() == Cast1->getType() &&
8916 "V2 and Cast1 should be the same type.");
8917 CastedTo = CmpI->getOperand(i_nocapture: 1);
8918 }
8919 }
8920
8921 return CastedTo;
8922}
8923SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
8924 Instruction::CastOps *CastOp,
8925 unsigned Depth) {
8926 if (Depth >= MaxAnalysisRecursionDepth)
8927 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8928
8929 SelectInst *SI = dyn_cast<SelectInst>(Val: V);
8930 if (!SI) return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8931
8932 CmpInst *CmpI = dyn_cast<CmpInst>(Val: SI->getCondition());
8933 if (!CmpI) return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8934
8935 Value *TrueVal = SI->getTrueValue();
8936 Value *FalseVal = SI->getFalseValue();
8937
8938 return llvm::matchDecomposedSelectPattern(
8939 CmpI, TrueVal, FalseVal, LHS, RHS,
8940 FMF: isa<FPMathOperator>(Val: SI) ? SI->getFastMathFlags() : FastMathFlags(),
8941 CastOp, Depth);
8942}
8943
8944SelectPatternResult llvm::matchDecomposedSelectPattern(
8945 CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
8946 FastMathFlags FMF, Instruction::CastOps *CastOp, unsigned Depth) {
8947 CmpInst::Predicate Pred = CmpI->getPredicate();
8948 Value *CmpLHS = CmpI->getOperand(i_nocapture: 0);
8949 Value *CmpRHS = CmpI->getOperand(i_nocapture: 1);
8950 if (isa<FPMathOperator>(Val: CmpI) && CmpI->hasNoNaNs())
8951 FMF.setNoNaNs();
8952
8953 // Bail out early.
8954 if (CmpI->isEquality())
8955 return {.Flavor: SPF_UNKNOWN, .NaNBehavior: SPNB_NA, .Ordered: false};
8956
8957 // Deal with type mismatches.
8958 if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
8959 if (Value *C = lookThroughCast(CmpI, V1: TrueVal, V2: FalseVal, CastOp)) {
8960 // If this is a potential fmin/fmax with a cast to integer, then ignore
8961 // -0.0 because there is no corresponding integer value.
8962 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
8963 FMF.setNoSignedZeros();
8964 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
8965 TrueVal: cast<CastInst>(Val: TrueVal)->getOperand(i_nocapture: 0), FalseVal: C,
8966 LHS, RHS, Depth);
8967 }
8968 if (Value *C = lookThroughCast(CmpI, V1: FalseVal, V2: TrueVal, CastOp)) {
8969 // If this is a potential fmin/fmax with a cast to integer, then ignore
8970 // -0.0 because there is no corresponding integer value.
8971 if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
8972 FMF.setNoSignedZeros();
8973 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
8974 TrueVal: C, FalseVal: cast<CastInst>(Val: FalseVal)->getOperand(i_nocapture: 0),
8975 LHS, RHS, Depth);
8976 }
8977 }
8978 return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
8979 LHS, RHS, Depth);
8980}
8981
8982CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
8983 if (SPF == SPF_SMIN) return ICmpInst::ICMP_SLT;
8984 if (SPF == SPF_UMIN) return ICmpInst::ICMP_ULT;
8985 if (SPF == SPF_SMAX) return ICmpInst::ICMP_SGT;
8986 if (SPF == SPF_UMAX) return ICmpInst::ICMP_UGT;
8987 if (SPF == SPF_FMINNUM)
8988 return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
8989 if (SPF == SPF_FMAXNUM)
8990 return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
8991 llvm_unreachable("unhandled!");
8992}
8993
8994Intrinsic::ID llvm::getMinMaxIntrinsic(SelectPatternFlavor SPF) {
8995 switch (SPF) {
8996 case SelectPatternFlavor::SPF_UMIN:
8997 return Intrinsic::umin;
8998 case SelectPatternFlavor::SPF_UMAX:
8999 return Intrinsic::umax;
9000 case SelectPatternFlavor::SPF_SMIN:
9001 return Intrinsic::smin;
9002 case SelectPatternFlavor::SPF_SMAX:
9003 return Intrinsic::smax;
9004 default:
9005 llvm_unreachable("Unexpected SPF");
9006 }
9007}
9008
9009SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
9010 if (SPF == SPF_SMIN) return SPF_SMAX;
9011 if (SPF == SPF_UMIN) return SPF_UMAX;
9012 if (SPF == SPF_SMAX) return SPF_SMIN;
9013 if (SPF == SPF_UMAX) return SPF_UMIN;
9014 llvm_unreachable("unhandled!");
9015}
9016
9017Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
9018 switch (MinMaxID) {
9019 case Intrinsic::smax: return Intrinsic::smin;
9020 case Intrinsic::smin: return Intrinsic::smax;
9021 case Intrinsic::umax: return Intrinsic::umin;
9022 case Intrinsic::umin: return Intrinsic::umax;
9023 // Please note that next four intrinsics may produce the same result for
9024 // original and inverted case even if X != Y due to NaN is handled specially.
9025 case Intrinsic::maximum: return Intrinsic::minimum;
9026 case Intrinsic::minimum: return Intrinsic::maximum;
9027 case Intrinsic::maxnum: return Intrinsic::minnum;
9028 case Intrinsic::minnum: return Intrinsic::maxnum;
9029 default: llvm_unreachable("Unexpected intrinsic");
9030 }
9031}
9032
9033APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
9034 switch (SPF) {
9035 case SPF_SMAX: return APInt::getSignedMaxValue(numBits: BitWidth);
9036 case SPF_SMIN: return APInt::getSignedMinValue(numBits: BitWidth);
9037 case SPF_UMAX: return APInt::getMaxValue(numBits: BitWidth);
9038 case SPF_UMIN: return APInt::getMinValue(numBits: BitWidth);
9039 default: llvm_unreachable("Unexpected flavor");
9040 }
9041}
9042
9043std::pair<Intrinsic::ID, bool>
9044llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
9045 // Check if VL contains select instructions that can be folded into a min/max
9046 // vector intrinsic and return the intrinsic if it is possible.
9047 // TODO: Support floating point min/max.
9048 bool AllCmpSingleUse = true;
9049 SelectPatternResult SelectPattern;
9050 SelectPattern.Flavor = SPF_UNKNOWN;
9051 if (all_of(Range&: VL, P: [&SelectPattern, &AllCmpSingleUse](Value *I) {
9052 Value *LHS, *RHS;
9053 auto CurrentPattern = matchSelectPattern(V: I, LHS, RHS);
9054 if (!SelectPatternResult::isMinOrMax(SPF: CurrentPattern.Flavor))
9055 return false;
9056 if (SelectPattern.Flavor != SPF_UNKNOWN &&
9057 SelectPattern.Flavor != CurrentPattern.Flavor)
9058 return false;
9059 SelectPattern = CurrentPattern;
9060 AllCmpSingleUse &=
9061 match(V: I, P: m_Select(C: m_OneUse(SubPattern: m_Value()), L: m_Value(), R: m_Value()));
9062 return true;
9063 })) {
9064 switch (SelectPattern.Flavor) {
9065 case SPF_SMIN:
9066 return {Intrinsic::smin, AllCmpSingleUse};
9067 case SPF_UMIN:
9068 return {Intrinsic::umin, AllCmpSingleUse};
9069 case SPF_SMAX:
9070 return {Intrinsic::smax, AllCmpSingleUse};
9071 case SPF_UMAX:
9072 return {Intrinsic::umax, AllCmpSingleUse};
9073 case SPF_FMAXNUM:
9074 return {Intrinsic::maxnum, AllCmpSingleUse};
9075 case SPF_FMINNUM:
9076 return {Intrinsic::minnum, AllCmpSingleUse};
9077 default:
9078 llvm_unreachable("unexpected select pattern flavor");
9079 }
9080 }
9081 return {Intrinsic::not_intrinsic, false};
9082}
9083
9084template <typename InstTy>
9085static bool matchTwoInputRecurrence(const PHINode *PN, InstTy *&Inst,
9086 Value *&Init, Value *&OtherOp) {
9087 // Handle the case of a simple two-predecessor recurrence PHI.
9088 // There's a lot more that could theoretically be done here, but
9089 // this is sufficient to catch some interesting cases.
9090 // TODO: Expand list -- gep, uadd.sat etc.
9091 if (PN->getNumIncomingValues() != 2)
9092 return false;
9093
9094 for (unsigned I = 0; I != 2; ++I) {
9095 if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(i: I))) {
9096 Value *LHS = Operation->getOperand(0);
9097 Value *RHS = Operation->getOperand(1);
9098 if (LHS != PN && RHS != PN)
9099 continue;
9100
9101 Inst = Operation;
9102 Init = PN->getIncomingValue(i: !I);
9103 OtherOp = (LHS == PN) ? RHS : LHS;
9104 return true;
9105 }
9106 }
9107 return false;
9108}
9109
9110bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
9111 Value *&Start, Value *&Step) {
9112 // We try to match a recurrence of the form:
9113 // %iv = [Start, %entry], [%iv.next, %backedge]
9114 // %iv.next = binop %iv, Step
9115 // Or:
9116 // %iv = [Start, %entry], [%iv.next, %backedge]
9117 // %iv.next = binop Step, %iv
9118 return matchTwoInputRecurrence(PN: P, Inst&: BO, Init&: Start, OtherOp&: Step);
9119}
9120
9121bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
9122 Value *&Start, Value *&Step) {
9123 BinaryOperator *BO = nullptr;
9124 P = dyn_cast<PHINode>(Val: I->getOperand(i_nocapture: 0));
9125 if (!P)
9126 P = dyn_cast<PHINode>(Val: I->getOperand(i_nocapture: 1));
9127 return P && matchSimpleRecurrence(P, BO, Start, Step) && BO == I;
9128}
9129
9130bool llvm::matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I,
9131 PHINode *&P, Value *&Init,
9132 Value *&OtherOp) {
9133 // Binary intrinsics only supported for now.
9134 if (I->arg_size() != 2 || I->getType() != I->getArgOperand(i: 0)->getType() ||
9135 I->getType() != I->getArgOperand(i: 1)->getType())
9136 return false;
9137
9138 IntrinsicInst *II = nullptr;
9139 P = dyn_cast<PHINode>(Val: I->getArgOperand(i: 0));
9140 if (!P)
9141 P = dyn_cast<PHINode>(Val: I->getArgOperand(i: 1));
9142
9143 return P && matchTwoInputRecurrence(PN: P, Inst&: II, Init, OtherOp) && II == I;
9144}
9145
9146/// Return true if "icmp Pred LHS RHS" is always true.
9147static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
9148 const Value *RHS) {
9149 if (ICmpInst::isTrueWhenEqual(predicate: Pred) && LHS == RHS)
9150 return true;
9151
9152 switch (Pred) {
9153 default:
9154 return false;
9155
9156 case CmpInst::ICMP_SLE: {
9157 const APInt *C;
9158
9159 // LHS s<= LHS +_{nsw} C if C >= 0
9160 // LHS s<= LHS | C if C >= 0
9161 if (match(V: RHS, P: m_NSWAdd(L: m_Specific(V: LHS), R: m_APInt(Res&: C))) ||
9162 match(V: RHS, P: m_Or(L: m_Specific(V: LHS), R: m_APInt(Res&: C))))
9163 return !C->isNegative();
9164
9165 // LHS s<= smax(LHS, V) for any V
9166 if (match(V: RHS, P: m_c_SMax(L: m_Specific(V: LHS), R: m_Value())))
9167 return true;
9168
9169 // smin(RHS, V) s<= RHS for any V
9170 if (match(V: LHS, P: m_c_SMin(L: m_Specific(V: RHS), R: m_Value())))
9171 return true;
9172
9173 // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
9174 const Value *X;
9175 const APInt *CLHS, *CRHS;
9176 if (match(V: LHS, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: CLHS))) &&
9177 match(V: RHS, P: m_NSWAddLike(L: m_Specific(V: X), R: m_APInt(Res&: CRHS))))
9178 return CLHS->sle(RHS: *CRHS);
9179
9180 return false;
9181 }
9182
9183 case CmpInst::ICMP_ULE: {
9184 // LHS u<= LHS +_{nuw} V for any V
9185 if (match(V: RHS, P: m_c_Add(L: m_Specific(V: LHS), R: m_Value())) &&
9186 cast<OverflowingBinaryOperator>(Val: RHS)->hasNoUnsignedWrap())
9187 return true;
9188
9189 // LHS u<= LHS | V for any V
9190 if (match(V: RHS, P: m_c_Or(L: m_Specific(V: LHS), R: m_Value())))
9191 return true;
9192
9193 // LHS u<= umax(LHS, V) for any V
9194 if (match(V: RHS, P: m_c_UMax(L: m_Specific(V: LHS), R: m_Value())))
9195 return true;
9196
9197 // RHS >> V u<= RHS for any V
9198 if (match(V: LHS, P: m_LShr(L: m_Specific(V: RHS), R: m_Value())))
9199 return true;
9200
9201 // RHS u/ C_ugt_1 u<= RHS
9202 const APInt *C;
9203 if (match(V: LHS, P: m_UDiv(L: m_Specific(V: RHS), R: m_APInt(Res&: C))) && C->ugt(RHS: 1))
9204 return true;
9205
9206 // RHS & V u<= RHS for any V
9207 if (match(V: LHS, P: m_c_And(L: m_Specific(V: RHS), R: m_Value())))
9208 return true;
9209
9210 // umin(RHS, V) u<= RHS for any V
9211 if (match(V: LHS, P: m_c_UMin(L: m_Specific(V: RHS), R: m_Value())))
9212 return true;
9213
9214 // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
9215 const Value *X;
9216 const APInt *CLHS, *CRHS;
9217 if (match(V: LHS, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: CLHS))) &&
9218 match(V: RHS, P: m_NUWAddLike(L: m_Specific(V: X), R: m_APInt(Res&: CRHS))))
9219 return CLHS->ule(RHS: *CRHS);
9220
9221 return false;
9222 }
9223 }
9224}
9225
9226/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
9227/// ALHS ARHS" is true. Otherwise, return std::nullopt.
9228static std::optional<bool>
9229isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
9230 const Value *ARHS, const Value *BLHS, const Value *BRHS) {
9231 switch (Pred) {
9232 default:
9233 return std::nullopt;
9234
9235 case CmpInst::ICMP_SLT:
9236 case CmpInst::ICMP_SLE:
9237 if (isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: BLHS, RHS: ALHS) &&
9238 isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: ARHS, RHS: BRHS))
9239 return true;
9240 return std::nullopt;
9241
9242 case CmpInst::ICMP_SGT:
9243 case CmpInst::ICMP_SGE:
9244 if (isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: ALHS, RHS: BLHS) &&
9245 isTruePredicate(Pred: CmpInst::ICMP_SLE, LHS: BRHS, RHS: ARHS))
9246 return true;
9247 return std::nullopt;
9248
9249 case CmpInst::ICMP_ULT:
9250 case CmpInst::ICMP_ULE:
9251 if (isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: BLHS, RHS: ALHS) &&
9252 isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: ARHS, RHS: BRHS))
9253 return true;
9254 return std::nullopt;
9255
9256 case CmpInst::ICMP_UGT:
9257 case CmpInst::ICMP_UGE:
9258 if (isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: ALHS, RHS: BLHS) &&
9259 isTruePredicate(Pred: CmpInst::ICMP_ULE, LHS: BRHS, RHS: ARHS))
9260 return true;
9261 return std::nullopt;
9262 }
9263}
9264
9265/// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9266/// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9267/// Otherwise, return std::nullopt if we can't infer anything.
9268static std::optional<bool>
9269isImpliedCondCommonOperandWithCR(CmpPredicate LPred, const ConstantRange &LCR,
9270 CmpPredicate RPred, const ConstantRange &RCR) {
9271 auto CRImpliesPred = [&](ConstantRange CR,
9272 CmpInst::Predicate Pred) -> std::optional<bool> {
9273 // If all true values for lhs and true for rhs, lhs implies rhs
9274 if (CR.icmp(Pred, Other: RCR))
9275 return true;
9276
9277 // If there is no overlap, lhs implies not rhs
9278 if (CR.icmp(Pred: CmpInst::getInversePredicate(pred: Pred), Other: RCR))
9279 return false;
9280
9281 return std::nullopt;
9282 };
9283 if (auto Res = CRImpliesPred(ConstantRange::makeAllowedICmpRegion(Pred: LPred, Other: LCR),
9284 RPred))
9285 return Res;
9286 if (LPred.hasSameSign() ^ RPred.hasSameSign()) {
9287 LPred = LPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(Pred: LPred)
9288 : static_cast<CmpInst::Predicate>(LPred);
9289 RPred = RPred.hasSameSign() ? ICmpInst::getFlippedSignednessPredicate(Pred: RPred)
9290 : static_cast<CmpInst::Predicate>(RPred);
9291 return CRImpliesPred(ConstantRange::makeAllowedICmpRegion(Pred: LPred, Other: LCR),
9292 RPred);
9293 }
9294 return std::nullopt;
9295}
9296
9297/// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9298/// is true. Return false if LHS implies RHS is false. Otherwise, return
9299/// std::nullopt if we can't infer anything.
9300static std::optional<bool>
9301isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
9302 CmpPredicate RPred, const Value *R0, const Value *R1,
9303 const DataLayout &DL, bool LHSIsTrue) {
9304 // The rest of the logic assumes the LHS condition is true. If that's not the
9305 // case, invert the predicate to make it so.
9306 if (!LHSIsTrue)
9307 LPred = ICmpInst::getInverseCmpPredicate(Pred: LPred);
9308
9309 // We can have non-canonical operands, so try to normalize any common operand
9310 // to L0/R0.
9311 if (L0 == R1) {
9312 std::swap(a&: R0, b&: R1);
9313 RPred = ICmpInst::getSwappedCmpPredicate(Pred: RPred);
9314 }
9315 if (R0 == L1) {
9316 std::swap(a&: L0, b&: L1);
9317 LPred = ICmpInst::getSwappedCmpPredicate(Pred: LPred);
9318 }
9319 if (L1 == R1) {
9320 // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9321 if (L0 != R0 || match(V: L0, P: m_ImmConstant())) {
9322 std::swap(a&: L0, b&: L1);
9323 LPred = ICmpInst::getSwappedCmpPredicate(Pred: LPred);
9324 std::swap(a&: R0, b&: R1);
9325 RPred = ICmpInst::getSwappedCmpPredicate(Pred: RPred);
9326 }
9327 }
9328
9329 // See if we can infer anything if operand-0 matches and we have at least one
9330 // constant.
9331 const APInt *Unused;
9332 if (L0 == R0 && (match(V: L1, P: m_APInt(Res&: Unused)) || match(V: R1, P: m_APInt(Res&: Unused)))) {
9333 // Potential TODO: We could also further use the constant range of L0/R0 to
9334 // further constraint the constant ranges. At the moment this leads to
9335 // several regressions related to not transforming `multi_use(A + C0) eq/ne
9336 // C1` (see discussion: D58633).
9337 ConstantRange LCR = computeConstantRange(
9338 V: L1, ForSigned: ICmpInst::isSigned(predicate: LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9339 /*CxtI=*/CtxI: nullptr, /*DT=*/nullptr, Depth: MaxAnalysisRecursionDepth - 1);
9340 ConstantRange RCR = computeConstantRange(
9341 V: R1, ForSigned: ICmpInst::isSigned(predicate: RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9342 /*CxtI=*/CtxI: nullptr, /*DT=*/nullptr, Depth: MaxAnalysisRecursionDepth - 1);
9343 // Even if L1/R1 are not both constant, we can still sometimes deduce
9344 // relationship from a single constant. For example X u> Y implies X != 0.
9345 if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
9346 return R;
9347 // If both L1/R1 were exact constant ranges and we didn't get anything
9348 // here, we won't be able to deduce this.
9349 if (match(V: L1, P: m_APInt(Res&: Unused)) && match(V: R1, P: m_APInt(Res&: Unused)))
9350 return std::nullopt;
9351 }
9352
9353 // Can we infer anything when the two compares have matching operands?
9354 if (L0 == R0 && L1 == R1)
9355 return ICmpInst::isImpliedByMatchingCmp(Pred1: LPred, Pred2: RPred);
9356
9357 // It only really makes sense in the context of signed comparison for "X - Y
9358 // must be positive if X >= Y and no overflow".
9359 // Take SGT as an example: L0:x > L1:y and C >= 0
9360 // ==> R0:(x -nsw y) < R1:(-C) is false
9361 CmpInst::Predicate SignedLPred = LPred.getPreferredSignedPredicate();
9362 if ((SignedLPred == ICmpInst::ICMP_SGT ||
9363 SignedLPred == ICmpInst::ICMP_SGE) &&
9364 match(V: R0, P: m_NSWSub(L: m_Specific(V: L0), R: m_Specific(V: L1)))) {
9365 if (match(V: R1, P: m_NonPositive()) &&
9366 ICmpInst::isImpliedByMatchingCmp(Pred1: SignedLPred, Pred2: RPred) == false)
9367 return false;
9368 }
9369
9370 // Take SLT as an example: L0:x < L1:y and C <= 0
9371 // ==> R0:(x -nsw y) < R1:(-C) is true
9372 if ((SignedLPred == ICmpInst::ICMP_SLT ||
9373 SignedLPred == ICmpInst::ICMP_SLE) &&
9374 match(V: R0, P: m_NSWSub(L: m_Specific(V: L0), R: m_Specific(V: L1)))) {
9375 if (match(V: R1, P: m_NonNegative()) &&
9376 ICmpInst::isImpliedByMatchingCmp(Pred1: SignedLPred, Pred2: RPred) == true)
9377 return true;
9378 }
9379
9380 // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9381 if (L0 == R0 &&
9382 (LPred == ICmpInst::ICMP_ULT || LPred == ICmpInst::ICMP_UGE) &&
9383 (RPred == ICmpInst::ICMP_ULT || RPred == ICmpInst::ICMP_UGE) &&
9384 match(V: L0, P: m_c_Add(L: m_Specific(V: L1), R: m_Specific(V: R1))))
9385 return CmpPredicate::getMatching(A: LPred, B: RPred).has_value();
9386
9387 if (auto P = CmpPredicate::getMatching(A: LPred, B: RPred))
9388 return isImpliedCondOperands(Pred: *P, ALHS: L0, ARHS: L1, BLHS: R0, BRHS: R1);
9389
9390 return std::nullopt;
9391}
9392
9393/// Return true if LHS implies RHS is true. Return false if LHS implies RHS is
9394/// false. Otherwise, return std::nullopt if we can't infer anything. We
9395/// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9396/// instruction.
9397static std::optional<bool>
9398isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
9399 const Value *RHSOp0, const Value *RHSOp1,
9400 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9401 // The LHS must be an 'or', 'and', or a 'select' instruction.
9402 assert((LHS->getOpcode() == Instruction::And ||
9403 LHS->getOpcode() == Instruction::Or ||
9404 LHS->getOpcode() == Instruction::Select) &&
9405 "Expected LHS to be 'and', 'or', or 'select'.");
9406
9407 assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
9408
9409 // If the result of an 'or' is false, then we know both legs of the 'or' are
9410 // false. Similarly, if the result of an 'and' is true, then we know both
9411 // legs of the 'and' are true.
9412 const Value *ALHS, *ARHS;
9413 if ((!LHSIsTrue && match(V: LHS, P: m_LogicalOr(L: m_Value(V&: ALHS), R: m_Value(V&: ARHS)))) ||
9414 (LHSIsTrue && match(V: LHS, P: m_LogicalAnd(L: m_Value(V&: ALHS), R: m_Value(V&: ARHS))))) {
9415 // FIXME: Make this non-recursion.
9416 if (std::optional<bool> Implication = isImpliedCondition(
9417 LHS: ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth: Depth + 1))
9418 return Implication;
9419 if (std::optional<bool> Implication = isImpliedCondition(
9420 LHS: ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth: Depth + 1))
9421 return Implication;
9422 return std::nullopt;
9423 }
9424 return std::nullopt;
9425}
9426
9427std::optional<bool>
9428llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
9429 const Value *RHSOp0, const Value *RHSOp1,
9430 const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9431 // Bail out when we hit the limit.
9432 if (Depth == MaxAnalysisRecursionDepth)
9433 return std::nullopt;
9434
9435 // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9436 // example.
9437 if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
9438 return std::nullopt;
9439
9440 assert(LHS->getType()->isIntOrIntVectorTy(1) &&
9441 "Expected integer type only!");
9442
9443 // Match not
9444 if (match(V: LHS, P: m_Not(V: m_Value(V&: LHS))))
9445 LHSIsTrue = !LHSIsTrue;
9446
9447 // Both LHS and RHS are icmps.
9448 if (const auto *LHSCmp = dyn_cast<ICmpInst>(Val: LHS))
9449 return isImpliedCondICmps(LPred: LHSCmp->getCmpPredicate(), L0: LHSCmp->getOperand(i_nocapture: 0),
9450 L1: LHSCmp->getOperand(i_nocapture: 1), RPred: RHSPred, R0: RHSOp0, R1: RHSOp1,
9451 DL, LHSIsTrue);
9452 const Value *V;
9453 if (match(V: LHS, P: m_NUWTrunc(Op: m_Value(V))))
9454 return isImpliedCondICmps(LPred: CmpInst::ICMP_NE, L0: V,
9455 L1: ConstantInt::get(Ty: V->getType(), V: 0), RPred: RHSPred,
9456 R0: RHSOp0, R1: RHSOp1, DL, LHSIsTrue);
9457
9458 /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect
9459 /// the RHS to be an icmp.
9460 /// FIXME: Add support for and/or/select on the RHS.
9461 if (const Instruction *LHSI = dyn_cast<Instruction>(Val: LHS)) {
9462 if ((LHSI->getOpcode() == Instruction::And ||
9463 LHSI->getOpcode() == Instruction::Or ||
9464 LHSI->getOpcode() == Instruction::Select))
9465 return isImpliedCondAndOr(LHS: LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
9466 Depth);
9467 }
9468 return std::nullopt;
9469}
9470
9471std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
9472 const DataLayout &DL,
9473 bool LHSIsTrue, unsigned Depth) {
9474 // LHS ==> RHS by definition
9475 if (LHS == RHS)
9476 return LHSIsTrue;
9477
9478 // Match not
9479 bool InvertRHS = false;
9480 if (match(V: RHS, P: m_Not(V: m_Value(V&: RHS)))) {
9481 if (LHS == RHS)
9482 return !LHSIsTrue;
9483 InvertRHS = true;
9484 }
9485
9486 if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(Val: RHS)) {
9487 if (auto Implied = isImpliedCondition(
9488 LHS, RHSPred: RHSCmp->getCmpPredicate(), RHSOp0: RHSCmp->getOperand(i_nocapture: 0),
9489 RHSOp1: RHSCmp->getOperand(i_nocapture: 1), DL, LHSIsTrue, Depth))
9490 return InvertRHS ? !*Implied : *Implied;
9491 return std::nullopt;
9492 }
9493
9494 const Value *V;
9495 if (match(V: RHS, P: m_NUWTrunc(Op: m_Value(V)))) {
9496 if (auto Implied = isImpliedCondition(LHS, RHSPred: CmpInst::ICMP_NE, RHSOp0: V,
9497 RHSOp1: ConstantInt::get(Ty: V->getType(), V: 0), DL,
9498 LHSIsTrue, Depth))
9499 return InvertRHS ? !*Implied : *Implied;
9500 return std::nullopt;
9501 }
9502
9503 if (Depth == MaxAnalysisRecursionDepth)
9504 return std::nullopt;
9505
9506 // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9507 // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9508 const Value *RHS1, *RHS2;
9509 if (match(V: RHS, P: m_LogicalOr(L: m_Value(V&: RHS1), R: m_Value(V&: RHS2)))) {
9510 if (std::optional<bool> Imp =
9511 isImpliedCondition(LHS, RHS: RHS1, DL, LHSIsTrue, Depth: Depth + 1))
9512 if (*Imp == true)
9513 return !InvertRHS;
9514 if (std::optional<bool> Imp =
9515 isImpliedCondition(LHS, RHS: RHS2, DL, LHSIsTrue, Depth: Depth + 1))
9516 if (*Imp == true)
9517 return !InvertRHS;
9518 }
9519 if (match(V: RHS, P: m_LogicalAnd(L: m_Value(V&: RHS1), R: m_Value(V&: RHS2)))) {
9520 if (std::optional<bool> Imp =
9521 isImpliedCondition(LHS, RHS: RHS1, DL, LHSIsTrue, Depth: Depth + 1))
9522 if (*Imp == false)
9523 return InvertRHS;
9524 if (std::optional<bool> Imp =
9525 isImpliedCondition(LHS, RHS: RHS2, DL, LHSIsTrue, Depth: Depth + 1))
9526 if (*Imp == false)
9527 return InvertRHS;
9528 }
9529
9530 return std::nullopt;
9531}
9532
9533// Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9534// condition dominating ContextI or nullptr, if no condition is found.
9535static std::pair<Value *, bool>
9536getDomPredecessorCondition(const Instruction *ContextI) {
9537 if (!ContextI || !ContextI->getParent())
9538 return {nullptr, false};
9539
9540 // TODO: This is a poor/cheap way to determine dominance. Should we use a
9541 // dominator tree (eg, from a SimplifyQuery) instead?
9542 const BasicBlock *ContextBB = ContextI->getParent();
9543 const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
9544 if (!PredBB)
9545 return {nullptr, false};
9546
9547 // We need a conditional branch in the predecessor.
9548 Value *PredCond;
9549 BasicBlock *TrueBB, *FalseBB;
9550 if (!match(V: PredBB->getTerminator(), P: m_Br(C: m_Value(V&: PredCond), T&: TrueBB, F&: FalseBB)))
9551 return {nullptr, false};
9552
9553 // The branch should get simplified. Don't bother simplifying this condition.
9554 if (TrueBB == FalseBB)
9555 return {nullptr, false};
9556
9557 assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
9558 "Predecessor block does not point to successor?");
9559
9560 // Is this condition implied by the predecessor condition?
9561 return {PredCond, TrueBB == ContextBB};
9562}
9563
9564std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
9565 const Instruction *ContextI,
9566 const DataLayout &DL) {
9567 assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9568 auto PredCond = getDomPredecessorCondition(ContextI);
9569 if (PredCond.first)
9570 return isImpliedCondition(LHS: PredCond.first, RHS: Cond, DL, LHSIsTrue: PredCond.second);
9571 return std::nullopt;
9572}
9573
9574std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred,
9575 const Value *LHS,
9576 const Value *RHS,
9577 const Instruction *ContextI,
9578 const DataLayout &DL) {
9579 auto PredCond = getDomPredecessorCondition(ContextI);
9580 if (PredCond.first)
9581 return isImpliedCondition(LHS: PredCond.first, RHSPred: Pred, RHSOp0: LHS, RHSOp1: RHS, DL,
9582 LHSIsTrue: PredCond.second);
9583 return std::nullopt;
9584}
9585
9586static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
9587 APInt &Upper, const InstrInfoQuery &IIQ,
9588 bool PreferSignedRange) {
9589 unsigned Width = Lower.getBitWidth();
9590 const APInt *C;
9591 switch (BO.getOpcode()) {
9592 case Instruction::Sub:
9593 if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9594 bool HasNSW = IIQ.hasNoSignedWrap(Op: &BO);
9595 bool HasNUW = IIQ.hasNoUnsignedWrap(Op: &BO);
9596
9597 // If the caller expects a signed compare, then try to use a signed range.
9598 // Otherwise if both no-wraps are set, use the unsigned range because it
9599 // is never larger than the signed range. Example:
9600 // "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
9601 // "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
9602 if (PreferSignedRange && HasNSW && HasNUW)
9603 HasNUW = false;
9604
9605 if (HasNUW) {
9606 // 'sub nuw c, x' produces [0, C].
9607 Upper = *C + 1;
9608 } else if (HasNSW) {
9609 if (C->isNegative()) {
9610 // 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN].
9611 Lower = APInt::getSignedMinValue(numBits: Width);
9612 Upper = *C - APInt::getSignedMaxValue(numBits: Width);
9613 } else {
9614 // Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap
9615 // 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX].
9616 Lower = *C - APInt::getSignedMaxValue(numBits: Width);
9617 Upper = APInt::getSignedMinValue(numBits: Width);
9618 }
9619 }
9620 }
9621 break;
9622 case Instruction::Add:
9623 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && !C->isZero()) {
9624 bool HasNSW = IIQ.hasNoSignedWrap(Op: &BO);
9625 bool HasNUW = IIQ.hasNoUnsignedWrap(Op: &BO);
9626
9627 // If the caller expects a signed compare, then try to use a signed
9628 // range. Otherwise if both no-wraps are set, use the unsigned range
9629 // because it is never larger than the signed range. Example: "add nuw
9630 // nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9631 if (PreferSignedRange && HasNSW && HasNUW)
9632 HasNUW = false;
9633
9634 if (HasNUW) {
9635 // 'add nuw x, C' produces [C, UINT_MAX].
9636 Lower = *C;
9637 } else if (HasNSW) {
9638 if (C->isNegative()) {
9639 // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9640 Lower = APInt::getSignedMinValue(numBits: Width);
9641 Upper = APInt::getSignedMaxValue(numBits: Width) + *C + 1;
9642 } else {
9643 // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9644 Lower = APInt::getSignedMinValue(numBits: Width) + *C;
9645 Upper = APInt::getSignedMaxValue(numBits: Width) + 1;
9646 }
9647 }
9648 }
9649 break;
9650
9651 case Instruction::And:
9652 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9653 // 'and x, C' produces [0, C].
9654 Upper = *C + 1;
9655 // X & -X is a power of two or zero. So we can cap the value at max power of
9656 // two.
9657 if (match(V: BO.getOperand(i_nocapture: 0), P: m_Neg(V: m_Specific(V: BO.getOperand(i_nocapture: 1)))) ||
9658 match(V: BO.getOperand(i_nocapture: 1), P: m_Neg(V: m_Specific(V: BO.getOperand(i_nocapture: 0)))))
9659 Upper = APInt::getSignedMinValue(numBits: Width) + 1;
9660 break;
9661
9662 case Instruction::Or:
9663 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9664 // 'or x, C' produces [C, UINT_MAX].
9665 Lower = *C;
9666 break;
9667
9668 case Instruction::AShr:
9669 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9670 // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9671 Lower = APInt::getSignedMinValue(numBits: Width).ashr(ShiftAmt: *C);
9672 Upper = APInt::getSignedMaxValue(numBits: Width).ashr(ShiftAmt: *C) + 1;
9673 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9674 unsigned ShiftAmount = Width - 1;
9675 if (!C->isZero() && IIQ.isExact(Op: &BO))
9676 ShiftAmount = C->countr_zero();
9677 if (C->isNegative()) {
9678 // 'ashr C, x' produces [C, C >> (Width-1)]
9679 Lower = *C;
9680 Upper = C->ashr(ShiftAmt: ShiftAmount) + 1;
9681 } else {
9682 // 'ashr C, x' produces [C >> (Width-1), C]
9683 Lower = C->ashr(ShiftAmt: ShiftAmount);
9684 Upper = *C + 1;
9685 }
9686 }
9687 break;
9688
9689 case Instruction::LShr:
9690 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9691 // 'lshr x, C' produces [0, UINT_MAX >> C].
9692 Upper = APInt::getAllOnes(numBits: Width).lshr(ShiftAmt: *C) + 1;
9693 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9694 // 'lshr C, x' produces [C >> (Width-1), C].
9695 unsigned ShiftAmount = Width - 1;
9696 if (!C->isZero() && IIQ.isExact(Op: &BO))
9697 ShiftAmount = C->countr_zero();
9698 Lower = C->lshr(shiftAmt: ShiftAmount);
9699 Upper = *C + 1;
9700 }
9701 break;
9702
9703 case Instruction::Shl:
9704 if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9705 if (IIQ.hasNoUnsignedWrap(Op: &BO)) {
9706 // 'shl nuw C, x' produces [C, C << CLZ(C)]
9707 Lower = *C;
9708 Upper = Lower.shl(shiftAmt: Lower.countl_zero()) + 1;
9709 } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
9710 if (C->isNegative()) {
9711 // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9712 unsigned ShiftAmount = C->countl_one() - 1;
9713 Lower = C->shl(shiftAmt: ShiftAmount);
9714 Upper = *C + 1;
9715 } else {
9716 // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9717 unsigned ShiftAmount = C->countl_zero() - 1;
9718 Lower = *C;
9719 Upper = C->shl(shiftAmt: ShiftAmount) + 1;
9720 }
9721 } else {
9722 // If lowbit is set, value can never be zero.
9723 if ((*C)[0])
9724 Lower = APInt::getOneBitSet(numBits: Width, BitNo: 0);
9725 // If we are shifting a constant the largest it can be is if the longest
9726 // sequence of consecutive ones is shifted to the highbits (breaking
9727 // ties for which sequence is higher). At the moment we take a liberal
9728 // upper bound on this by just popcounting the constant.
9729 // TODO: There may be a bitwise trick for it longest/highest
9730 // consecutative sequence of ones (naive method is O(Width) loop).
9731 Upper = APInt::getHighBitsSet(numBits: Width, hiBitsSet: C->popcount()) + 1;
9732 }
9733 } else if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && C->ult(RHS: Width)) {
9734 Upper = APInt::getBitsSetFrom(numBits: Width, loBit: C->getZExtValue()) + 1;
9735 }
9736 break;
9737
9738 case Instruction::SDiv:
9739 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9740 APInt IntMin = APInt::getSignedMinValue(numBits: Width);
9741 APInt IntMax = APInt::getSignedMaxValue(numBits: Width);
9742 if (C->isAllOnes()) {
9743 // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9744 // where C != -1 and C != 0 and C != 1
9745 Lower = IntMin + 1;
9746 Upper = IntMax + 1;
9747 } else if (C->countl_zero() < Width - 1) {
9748 // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9749 // where C != -1 and C != 0 and C != 1
9750 Lower = IntMin.sdiv(RHS: *C);
9751 Upper = IntMax.sdiv(RHS: *C);
9752 if (Lower.sgt(RHS: Upper))
9753 std::swap(a&: Lower, b&: Upper);
9754 Upper = Upper + 1;
9755 assert(Upper != Lower && "Upper part of range has wrapped!");
9756 }
9757 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9758 if (C->isMinSignedValue()) {
9759 // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9760 Lower = *C;
9761 Upper = Lower.lshr(shiftAmt: 1) + 1;
9762 } else {
9763 // 'sdiv C, x' produces [-|C|, |C|].
9764 Upper = C->abs() + 1;
9765 Lower = (-Upper) + 1;
9766 }
9767 }
9768 break;
9769
9770 case Instruction::UDiv:
9771 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)) && !C->isZero()) {
9772 // 'udiv x, C' produces [0, UINT_MAX / C].
9773 Upper = APInt::getMaxValue(numBits: Width).udiv(RHS: *C) + 1;
9774 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9775 // 'udiv C, x' produces [0, C].
9776 Upper = *C + 1;
9777 }
9778 break;
9779
9780 case Instruction::SRem:
9781 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9782 // 'srem x, C' produces (-|C|, |C|).
9783 Upper = C->abs();
9784 Lower = (-Upper) + 1;
9785 } else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9786 if (C->isNegative()) {
9787 // 'srem -|C|, x' produces [-|C|, 0].
9788 Upper = 1;
9789 Lower = *C;
9790 } else {
9791 // 'srem |C|, x' produces [0, |C|].
9792 Upper = *C + 1;
9793 }
9794 }
9795 break;
9796
9797 case Instruction::URem:
9798 if (match(V: BO.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9799 // 'urem x, C' produces [0, C).
9800 Upper = *C;
9801 else if (match(V: BO.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)))
9802 // 'urem C, x' produces [0, C].
9803 Upper = *C + 1;
9804 break;
9805
9806 default:
9807 break;
9808 }
9809}
9810
9811static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
9812 bool UseInstrInfo) {
9813 unsigned Width = II.getType()->getScalarSizeInBits();
9814 const APInt *C;
9815 switch (II.getIntrinsicID()) {
9816 case Intrinsic::ctlz:
9817 case Intrinsic::cttz: {
9818 APInt Upper(Width, Width);
9819 if (!UseInstrInfo || !match(V: II.getArgOperand(i: 1), P: m_One()))
9820 Upper += 1;
9821 // Maximum of set/clear bits is the bit width.
9822 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width), Upper);
9823 }
9824 case Intrinsic::ctpop:
9825 // Maximum of set/clear bits is the bit width.
9826 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9827 Upper: APInt(Width, Width) + 1);
9828 case Intrinsic::uadd_sat:
9829 // uadd.sat(x, C) produces [C, UINT_MAX].
9830 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) ||
9831 match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9832 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: Width));
9833 break;
9834 case Intrinsic::sadd_sat:
9835 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) ||
9836 match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9837 if (C->isNegative())
9838 // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9839 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9840 Upper: APInt::getSignedMaxValue(numBits: Width) + *C +
9841 1);
9842
9843 // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9844 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width) + *C,
9845 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9846 }
9847 break;
9848 case Intrinsic::usub_sat:
9849 // usub.sat(C, x) produces [0, C].
9850 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)))
9851 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width), Upper: *C + 1);
9852
9853 // usub.sat(x, C) produces [0, UINT_MAX - C].
9854 if (match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9855 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9856 Upper: APInt::getMaxValue(numBits: Width) - *C + 1);
9857 break;
9858 case Intrinsic::ssub_sat:
9859 if (match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C))) {
9860 if (C->isNegative())
9861 // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9862 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9863 Upper: *C - APInt::getSignedMinValue(numBits: Width) +
9864 1);
9865
9866 // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9867 return ConstantRange::getNonEmpty(Lower: *C - APInt::getSignedMaxValue(numBits: Width),
9868 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9869 } else if (match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C))) {
9870 if (C->isNegative())
9871 // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9872 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width) - *C,
9873 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9874
9875 // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9876 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9877 Upper: APInt::getSignedMaxValue(numBits: Width) - *C +
9878 1);
9879 }
9880 break;
9881 case Intrinsic::umin:
9882 case Intrinsic::umax:
9883 case Intrinsic::smin:
9884 case Intrinsic::smax:
9885 if (!match(V: II.getOperand(i_nocapture: 0), P: m_APInt(Res&: C)) &&
9886 !match(V: II.getOperand(i_nocapture: 1), P: m_APInt(Res&: C)))
9887 break;
9888
9889 switch (II.getIntrinsicID()) {
9890 case Intrinsic::umin:
9891 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width), Upper: *C + 1);
9892 case Intrinsic::umax:
9893 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: Width));
9894 case Intrinsic::smin:
9895 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: Width),
9896 Upper: *C + 1);
9897 case Intrinsic::smax:
9898 return ConstantRange::getNonEmpty(Lower: *C,
9899 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9900 default:
9901 llvm_unreachable("Must be min/max intrinsic");
9902 }
9903 break;
9904 case Intrinsic::abs:
9905 // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9906 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9907 if (match(V: II.getOperand(i_nocapture: 1), P: m_One()))
9908 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9909 Upper: APInt::getSignedMaxValue(numBits: Width) + 1);
9910
9911 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: Width),
9912 Upper: APInt::getSignedMinValue(numBits: Width) + 1);
9913 case Intrinsic::vscale:
9914 if (!II.getParent() || !II.getFunction())
9915 break;
9916 return getVScaleRange(F: II.getFunction(), BitWidth: Width);
9917 default:
9918 break;
9919 }
9920
9921 return ConstantRange::getFull(BitWidth: Width);
9922}
9923
9924static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
9925 const InstrInfoQuery &IIQ) {
9926 unsigned BitWidth = SI.getType()->getScalarSizeInBits();
9927 const Value *LHS = nullptr, *RHS = nullptr;
9928 SelectPatternResult R = matchSelectPattern(V: &SI, LHS, RHS);
9929 if (R.Flavor == SPF_UNKNOWN)
9930 return ConstantRange::getFull(BitWidth);
9931
9932 if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
9933 // If the negation part of the abs (in RHS) has the NSW flag,
9934 // then the result of abs(X) is [0..SIGNED_MAX],
9935 // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9936 if (match(V: RHS, P: m_Neg(V: m_Specific(V: LHS))) &&
9937 IIQ.hasNoSignedWrap(Op: cast<Instruction>(Val: RHS)))
9938 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth),
9939 Upper: APInt::getSignedMaxValue(numBits: BitWidth) + 1);
9940
9941 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth),
9942 Upper: APInt::getSignedMinValue(numBits: BitWidth) + 1);
9943 }
9944
9945 if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
9946 // The result of -abs(X) is <= 0.
9947 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: BitWidth),
9948 Upper: APInt(BitWidth, 1));
9949 }
9950
9951 const APInt *C;
9952 if (!match(V: LHS, P: m_APInt(Res&: C)) && !match(V: RHS, P: m_APInt(Res&: C)))
9953 return ConstantRange::getFull(BitWidth);
9954
9955 switch (R.Flavor) {
9956 case SPF_UMIN:
9957 return ConstantRange::getNonEmpty(Lower: APInt::getZero(numBits: BitWidth), Upper: *C + 1);
9958 case SPF_UMAX:
9959 return ConstantRange::getNonEmpty(Lower: *C, Upper: APInt::getZero(numBits: BitWidth));
9960 case SPF_SMIN:
9961 return ConstantRange::getNonEmpty(Lower: APInt::getSignedMinValue(numBits: BitWidth),
9962 Upper: *C + 1);
9963 case SPF_SMAX:
9964 return ConstantRange::getNonEmpty(Lower: *C,
9965 Upper: APInt::getSignedMaxValue(numBits: BitWidth) + 1);
9966 default:
9967 return ConstantRange::getFull(BitWidth);
9968 }
9969}
9970
9971static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
9972 // The maximum representable value of a half is 65504. For floats the maximum
9973 // value is 3.4e38 which requires roughly 129 bits.
9974 unsigned BitWidth = I->getType()->getScalarSizeInBits();
9975 if (!I->getOperand(i: 0)->getType()->getScalarType()->isHalfTy())
9976 return;
9977 if (isa<FPToSIInst>(Val: I) && BitWidth >= 17) {
9978 Lower = APInt(BitWidth, -65504, true);
9979 Upper = APInt(BitWidth, 65505);
9980 }
9981
9982 if (isa<FPToUIInst>(Val: I) && BitWidth >= 16) {
9983 // For a fptoui the lower limit is left as 0.
9984 Upper = APInt(BitWidth, 65505);
9985 }
9986}
9987
9988ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
9989 bool UseInstrInfo, AssumptionCache *AC,
9990 const Instruction *CtxI,
9991 const DominatorTree *DT,
9992 unsigned Depth) {
9993 assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
9994
9995 if (Depth == MaxAnalysisRecursionDepth)
9996 return ConstantRange::getFull(BitWidth: V->getType()->getScalarSizeInBits());
9997
9998 if (auto *C = dyn_cast<Constant>(Val: V))
9999 return C->toConstantRange();
10000
10001 unsigned BitWidth = V->getType()->getScalarSizeInBits();
10002 InstrInfoQuery IIQ(UseInstrInfo);
10003 ConstantRange CR = ConstantRange::getFull(BitWidth);
10004 if (auto *BO = dyn_cast<BinaryOperator>(Val: V)) {
10005 APInt Lower = APInt(BitWidth, 0);
10006 APInt Upper = APInt(BitWidth, 0);
10007 // TODO: Return ConstantRange.
10008 setLimitsForBinOp(BO: *BO, Lower, Upper, IIQ, PreferSignedRange: ForSigned);
10009 CR = ConstantRange::getNonEmpty(Lower, Upper);
10010 } else if (auto *II = dyn_cast<IntrinsicInst>(Val: V))
10011 CR = getRangeForIntrinsic(II: *II, UseInstrInfo);
10012 else if (auto *SI = dyn_cast<SelectInst>(Val: V)) {
10013 ConstantRange CRTrue = computeConstantRange(
10014 V: SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth: Depth + 1);
10015 ConstantRange CRFalse = computeConstantRange(
10016 V: SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth: Depth + 1);
10017 CR = CRTrue.unionWith(CR: CRFalse);
10018 CR = CR.intersectWith(CR: getRangeForSelectPattern(SI: *SI, IIQ));
10019 } else if (isa<FPToUIInst>(Val: V) || isa<FPToSIInst>(Val: V)) {
10020 APInt Lower = APInt(BitWidth, 0);
10021 APInt Upper = APInt(BitWidth, 0);
10022 // TODO: Return ConstantRange.
10023 setLimitForFPToI(I: cast<Instruction>(Val: V), Lower, Upper);
10024 CR = ConstantRange::getNonEmpty(Lower, Upper);
10025 } else if (const auto *A = dyn_cast<Argument>(Val: V))
10026 if (std::optional<ConstantRange> Range = A->getRange())
10027 CR = *Range;
10028
10029 if (auto *I = dyn_cast<Instruction>(Val: V)) {
10030 if (auto *Range = IIQ.getMetadata(I, KindID: LLVMContext::MD_range))
10031 CR = CR.intersectWith(CR: getConstantRangeFromMetadata(RangeMD: *Range));
10032
10033 if (const auto *CB = dyn_cast<CallBase>(Val: V))
10034 if (std::optional<ConstantRange> Range = CB->getRange())
10035 CR = CR.intersectWith(CR: *Range);
10036 }
10037
10038 if (CtxI && AC) {
10039 // Try to restrict the range based on information from assumptions.
10040 for (auto &AssumeVH : AC->assumptionsFor(V)) {
10041 if (!AssumeVH)
10042 continue;
10043 CallInst *I = cast<CallInst>(Val&: AssumeVH);
10044 assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
10045 "Got assumption for the wrong function!");
10046 assert(I->getIntrinsicID() == Intrinsic::assume &&
10047 "must be an assume intrinsic");
10048
10049 if (!isValidAssumeForContext(Inv: I, CxtI: CtxI, DT))
10050 continue;
10051 Value *Arg = I->getArgOperand(i: 0);
10052 ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: Arg);
10053 // Currently we just use information from comparisons.
10054 if (!Cmp || Cmp->getOperand(i_nocapture: 0) != V)
10055 continue;
10056 // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
10057 ConstantRange RHS =
10058 computeConstantRange(V: Cmp->getOperand(i_nocapture: 1), /* ForSigned */ false,
10059 UseInstrInfo, AC, CtxI: I, DT, Depth: Depth + 1);
10060 CR = CR.intersectWith(
10061 CR: ConstantRange::makeAllowedICmpRegion(Pred: Cmp->getPredicate(), Other: RHS));
10062 }
10063 }
10064
10065 return CR;
10066}
10067
10068static void
10069addValueAffectedByCondition(Value *V,
10070 function_ref<void(Value *)> InsertAffected) {
10071 assert(V != nullptr);
10072 if (isa<Argument>(Val: V) || isa<GlobalValue>(Val: V)) {
10073 InsertAffected(V);
10074 } else if (auto *I = dyn_cast<Instruction>(Val: V)) {
10075 InsertAffected(V);
10076
10077 // Peek through unary operators to find the source of the condition.
10078 Value *Op;
10079 if (match(V: I, P: m_CombineOr(L: m_PtrToInt(Op: m_Value(V&: Op)), R: m_Trunc(Op: m_Value(V&: Op))))) {
10080 if (isa<Instruction>(Val: Op) || isa<Argument>(Val: Op))
10081 InsertAffected(Op);
10082 }
10083 }
10084}
10085
10086void llvm::findValuesAffectedByCondition(
10087 Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) {
10088 auto AddAffected = [&InsertAffected](Value *V) {
10089 addValueAffectedByCondition(V, InsertAffected);
10090 };
10091
10092 auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) {
10093 if (IsAssume) {
10094 AddAffected(LHS);
10095 AddAffected(RHS);
10096 } else if (match(V: RHS, P: m_Constant()))
10097 AddAffected(LHS);
10098 };
10099
10100 SmallVector<Value *, 8> Worklist;
10101 SmallPtrSet<Value *, 8> Visited;
10102 Worklist.push_back(Elt: Cond);
10103 while (!Worklist.empty()) {
10104 Value *V = Worklist.pop_back_val();
10105 if (!Visited.insert(Ptr: V).second)
10106 continue;
10107
10108 CmpPredicate Pred;
10109 Value *A, *B, *X;
10110
10111 if (IsAssume) {
10112 AddAffected(V);
10113 if (match(V, P: m_Not(V: m_Value(V&: X))))
10114 AddAffected(X);
10115 }
10116
10117 if (match(V, P: m_LogicalOp(L: m_Value(V&: A), R: m_Value(V&: B)))) {
10118 // assume(A && B) is split to -> assume(A); assume(B);
10119 // assume(!(A || B)) is split to -> assume(!A); assume(!B);
10120 // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
10121 // enough information to be worth handling (intersection of information as
10122 // opposed to union).
10123 if (!IsAssume) {
10124 Worklist.push_back(Elt: A);
10125 Worklist.push_back(Elt: B);
10126 }
10127 } else if (match(V, P: m_ICmp(Pred, L: m_Value(V&: A), R: m_Value(V&: B)))) {
10128 bool HasRHSC = match(V: B, P: m_ConstantInt());
10129 if (ICmpInst::isEquality(P: Pred)) {
10130 AddAffected(A);
10131 if (IsAssume)
10132 AddAffected(B);
10133 if (HasRHSC) {
10134 Value *Y;
10135 // (X & C) or (X | C).
10136 // (X << C) or (X >>_s C) or (X >>_u C).
10137 if (match(V: A, P: m_Shift(L: m_Value(V&: X), R: m_ConstantInt())))
10138 AddAffected(X);
10139 else if (match(V: A, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) ||
10140 match(V: A, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y)))) {
10141 AddAffected(X);
10142 AddAffected(Y);
10143 }
10144 }
10145 } else {
10146 AddCmpOperands(A, B);
10147 if (HasRHSC) {
10148 // Handle (A + C1) u< C2, which is the canonical form of
10149 // A > C3 && A < C4.
10150 if (match(V: A, P: m_AddLike(L: m_Value(V&: X), R: m_ConstantInt())))
10151 AddAffected(X);
10152
10153 if (ICmpInst::isUnsigned(predicate: Pred)) {
10154 Value *Y;
10155 // X & Y u> C -> X >u C && Y >u C
10156 // X | Y u< C -> X u< C && Y u< C
10157 // X nuw+ Y u< C -> X u< C && Y u< C
10158 if (match(V: A, P: m_And(L: m_Value(V&: X), R: m_Value(V&: Y))) ||
10159 match(V: A, P: m_Or(L: m_Value(V&: X), R: m_Value(V&: Y))) ||
10160 match(V: A, P: m_NUWAdd(L: m_Value(V&: X), R: m_Value(V&: Y)))) {
10161 AddAffected(X);
10162 AddAffected(Y);
10163 }
10164 // X nuw- Y u> C -> X u> C
10165 if (match(V: A, P: m_NUWSub(L: m_Value(V&: X), R: m_Value())))
10166 AddAffected(X);
10167 }
10168 }
10169
10170 // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
10171 // by computeKnownFPClass().
10172 if (match(V: A, P: m_ElementWiseBitCast(Op: m_Value(V&: X)))) {
10173 if (Pred == ICmpInst::ICMP_SLT && match(V: B, P: m_Zero()))
10174 InsertAffected(X);
10175 else if (Pred == ICmpInst::ICMP_SGT && match(V: B, P: m_AllOnes()))
10176 InsertAffected(X);
10177 }
10178 }
10179
10180 if (HasRHSC && match(V: A, P: m_Intrinsic<Intrinsic::ctpop>(Op0: m_Value(V&: X))))
10181 AddAffected(X);
10182 } else if (match(V, P: m_FCmp(Pred, L: m_Value(V&: A), R: m_Value(V&: B)))) {
10183 AddCmpOperands(A, B);
10184
10185 // fcmp fneg(x), y
10186 // fcmp fabs(x), y
10187 // fcmp fneg(fabs(x)), y
10188 if (match(V: A, P: m_FNeg(X: m_Value(V&: A))))
10189 AddAffected(A);
10190 if (match(V: A, P: m_FAbs(Op0: m_Value(V&: A))))
10191 AddAffected(A);
10192
10193 } else if (match(V, P: m_Intrinsic<Intrinsic::is_fpclass>(Op0: m_Value(V&: A),
10194 Op1: m_Value()))) {
10195 // Handle patterns that computeKnownFPClass() support.
10196 AddAffected(A);
10197 } else if (!IsAssume && match(V, P: m_Trunc(Op: m_Value(V&: X)))) {
10198 // Assume is checked here as X is already added above for assumes in
10199 // addValueAffectedByCondition
10200 AddAffected(X);
10201 } else if (!IsAssume && match(V, P: m_Not(V: m_Value(V&: X)))) {
10202 // Assume is checked here to avoid issues with ephemeral values
10203 Worklist.push_back(Elt: X);
10204 }
10205 }
10206}
10207
10208const Value *llvm::stripNullTest(const Value *V) {
10209 // (X >> C) or/add (X & mask(C) != 0)
10210 if (const auto *BO = dyn_cast<BinaryOperator>(Val: V)) {
10211 if (BO->getOpcode() == Instruction::Add ||
10212 BO->getOpcode() == Instruction::Or) {
10213 const Value *X;
10214 const APInt *C1, *C2;
10215 if (match(V: BO, P: m_c_BinOp(L: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C1)),
10216 R: m_ZExt(Op: m_SpecificICmp(
10217 MatchPred: ICmpInst::ICMP_NE,
10218 L: m_And(L: m_Deferred(V: X), R: m_LowBitMask(V&: C2)),
10219 R: m_Zero())))) &&
10220 C2->popcount() == C1->getZExtValue())
10221 return X;
10222 }
10223 }
10224 return nullptr;
10225}
10226
10227Value *llvm::stripNullTest(Value *V) {
10228 return const_cast<Value *>(stripNullTest(V: const_cast<const Value *>(V)));
10229}
10230