1//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains logic for simplifying instructions based on information
10// about how they are used.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/Analysis/ValueTracking.h"
16#include "llvm/IR/GetElementPtrTypeIterator.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/PatternMatch.h"
19#include "llvm/IR/ProfDataUtils.h"
20#include "llvm/Support/KnownBits.h"
21#include "llvm/Transforms/InstCombine/InstCombiner.h"
22
23using namespace llvm;
24using namespace llvm::PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28static cl::opt<bool>
29 VerifyKnownBits("instcombine-verify-known-bits",
30 cl::desc("Verify that computeKnownBits() and "
31 "SimplifyDemandedBits() are consistent"),
32 cl::Hidden, cl::init(Val: false));
33
34static cl::opt<unsigned> SimplifyDemandedVectorEltsDepthLimit(
35 "instcombine-simplify-vector-elts-depth",
36 cl::desc(
37 "Depth limit when simplifying vector instructions and their operands"),
38 cl::Hidden, cl::init(Val: 10));
39
40/// Check to see if the specified operand of the specified instruction is a
41/// constant integer. If so, check to see if there are any bits set in the
42/// constant that are not demanded. If so, shrink the constant and return true.
43static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
44 const APInt &Demanded) {
45 assert(I && "No instruction?");
46 assert(OpNo < I->getNumOperands() && "Operand index too large");
47
48 // The operand must be a constant integer or splat integer.
49 Value *Op = I->getOperand(i: OpNo);
50 const APInt *C;
51 if (!match(V: Op, P: m_APInt(Res&: C)))
52 return false;
53
54 // If there are no bits set that aren't demanded, nothing to do.
55 if (C->isSubsetOf(RHS: Demanded))
56 return false;
57
58 // This instruction is producing bits that are not demanded. Shrink the RHS.
59 I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: Op->getType(), V: *C & Demanded));
60
61 return true;
62}
63
64/// Let N = 2 * M.
65/// Given an N-bit integer representing a pack of two M-bit integers,
66/// we can select one of the packed integers by right-shifting by either
67/// zero or M (which is the most straightforward to check if M is a power
68/// of 2), and then isolating the lower M bits. In this case, we can
69/// represent the shift as a select on whether the shr amount is nonzero.
70static Value *simplifyShiftSelectingPackedElement(Instruction *I,
71 const APInt &DemandedMask,
72 InstCombinerImpl &IC,
73 unsigned Depth) {
74 assert(I->getOpcode() == Instruction::LShr &&
75 "Only lshr instruction supported");
76
77 uint64_t ShlAmt;
78 Value *Upper, *Lower;
79 if (!match(V: I->getOperand(i: 0),
80 P: m_OneUse(SubPattern: m_c_DisjointOr(
81 L: m_OneUse(SubPattern: m_Shl(L: m_Value(V&: Upper), R: m_ConstantInt(V&: ShlAmt))),
82 R: m_Value(V&: Lower)))))
83 return nullptr;
84
85 if (!isPowerOf2_64(Value: ShlAmt))
86 return nullptr;
87
88 const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
89 if (DemandedBitWidth > ShlAmt)
90 return nullptr;
91
92 // Check that upper demanded bits are not lost from lshift.
93 if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
94 return nullptr;
95
96 KnownBits KnownLowerBits = IC.computeKnownBits(V: Lower, CxtI: I, Depth);
97 if (!KnownLowerBits.getMaxValue().isIntN(N: ShlAmt))
98 return nullptr;
99
100 Value *ShrAmt = I->getOperand(i: 1);
101 KnownBits KnownShrBits = IC.computeKnownBits(V: ShrAmt, CxtI: I, Depth);
102
103 // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
104 // zero.
105 if (~KnownShrBits.Zero != ShlAmt)
106 return nullptr;
107
108 IRBuilderBase::InsertPointGuard Guard(IC.Builder);
109 IC.Builder.SetInsertPoint(I);
110 Value *ShrAmtZ =
111 IC.Builder.CreateICmpEQ(LHS: ShrAmt, RHS: Constant::getNullValue(Ty: ShrAmt->getType()),
112 Name: ShrAmt->getName() + ".z");
113 // There is no existing !prof metadata we can derive the !prof metadata for
114 // this select.
115 Value *Select = IC.Builder.CreateSelectWithUnknownProfile(C: ShrAmtZ, True: Lower,
116 False: Upper, DEBUG_TYPE);
117 Select->takeName(V: I);
118 return Select;
119}
120
121/// Returns the bitwidth of the given scalar or pointer type. For vector types,
122/// returns the element type's bitwidth.
123static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
124 if (unsigned BitWidth = Ty->getScalarSizeInBits())
125 return BitWidth;
126
127 return DL.getPointerTypeSizeInBits(Ty);
128}
129
130/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
131/// the instruction has any properties that allow us to simplify its operands.
132bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst,
133 KnownBits &Known) {
134 APInt DemandedMask(APInt::getAllOnes(numBits: Known.getBitWidth()));
135 Value *V = SimplifyDemandedUseBits(I: &Inst, DemandedMask, Known,
136 Q: SQ.getWithInstruction(I: &Inst));
137 if (!V) return false;
138 if (V == &Inst) return true;
139 replaceInstUsesWith(I&: Inst, V);
140 return true;
141}
142
143/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
144/// the instruction has any properties that allow us to simplify its operands.
145bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
146 KnownBits Known(getBitWidth(Ty: Inst.getType(), DL));
147 return SimplifyDemandedInstructionBits(Inst, Known);
148}
149
150bool InstCombinerImpl::SimplifyDemandedInstructionFPClass(Instruction &Inst) {
151 KnownFPClass Known;
152
153 Value *V = SimplifyDemandedUseFPClass(I: &Inst, DemandedMask: fcAllFlags, Known,
154 Q: SQ.getWithInstruction(I: &Inst));
155 if (!V)
156 return false;
157 if (V == &Inst)
158 return true;
159 replaceInstUsesWith(I&: Inst, V);
160 return true;
161}
162
163/// This form of SimplifyDemandedBits simplifies the specified instruction
164/// operand if possible, updating it in place. It returns true if it made any
165/// change and false otherwise.
166bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
167 const APInt &DemandedMask,
168 KnownBits &Known,
169 const SimplifyQuery &Q,
170 unsigned Depth) {
171 Use &U = I->getOperandUse(i: OpNo);
172 Value *V = U.get();
173 if (isa<Constant>(Val: V)) {
174 llvm::computeKnownBits(V, Known, Q, Depth);
175 return false;
176 }
177
178 Known.resetAll();
179 if (DemandedMask.isZero()) {
180 // Not demanding any bits from V.
181 replaceUse(U, NewValue: UndefValue::get(T: V->getType()));
182 return true;
183 }
184
185 Instruction *VInst = dyn_cast<Instruction>(Val: V);
186 if (!VInst) {
187 llvm::computeKnownBits(V, Known, Q, Depth);
188 return false;
189 }
190
191 if (Depth == MaxAnalysisRecursionDepth)
192 return false;
193
194 Value *NewVal;
195 if (VInst->hasOneUse()) {
196 // If the instruction has one use, we can directly simplify it.
197 NewVal = SimplifyDemandedUseBits(I: VInst, DemandedMask, Known, Q, Depth);
198 } else {
199 // If there are multiple uses of this instruction, then we can simplify
200 // VInst to some other value, but not modify the instruction.
201 NewVal =
202 SimplifyMultipleUseDemandedBits(I: VInst, DemandedMask, Known, Q, Depth);
203 }
204 if (!NewVal) return false;
205 if (Instruction* OpInst = dyn_cast<Instruction>(Val&: U))
206 salvageDebugInfo(I&: *OpInst);
207
208 replaceUse(U, NewValue: NewVal);
209 return true;
210}
211
212/// This function attempts to replace V with a simpler value based on the
213/// demanded bits. When this function is called, it is known that only the bits
214/// set in DemandedMask of the result of V are ever used downstream.
215/// Consequently, depending on the mask and V, it may be possible to replace V
216/// with a constant or one of its operands. In such cases, this function does
217/// the replacement and returns true. In all other cases, it returns false after
218/// analyzing the expression and setting KnownOne and known to be one in the
219/// expression. Known.Zero contains all the bits that are known to be zero in
220/// the expression. These are provided to potentially allow the caller (which
221/// might recursively be SimplifyDemandedBits itself) to simplify the
222/// expression.
223/// Known.One and Known.Zero always follow the invariant that:
224/// Known.One & Known.Zero == 0.
225/// That is, a bit can't be both 1 and 0. The bits in Known.One and Known.Zero
226/// are accurate even for bits not in DemandedMask. Note
227/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
228/// be the same.
229///
230/// This returns null if it did not change anything and it permits no
231/// simplification. This returns V itself if it did some simplification of V's
232/// operands based on the information about what bits are demanded. This returns
233/// some other non-null value if it found out that V is equal to another value
234/// in the context where the specified bits are demanded, but not for all users.
235Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
236 const APInt &DemandedMask,
237 KnownBits &Known,
238 const SimplifyQuery &Q,
239 unsigned Depth) {
240 assert(I != nullptr && "Null pointer of Value???");
241 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
242 uint32_t BitWidth = DemandedMask.getBitWidth();
243 Type *VTy = I->getType();
244 assert(
245 (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
246 Known.getBitWidth() == BitWidth &&
247 "Value *V, DemandedMask and Known must have same BitWidth");
248
249 KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
250
251 // Update flags after simplifying an operand based on the fact that some high
252 // order bits are not demanded.
253 auto disableWrapFlagsBasedOnUnusedHighBits = [](Instruction *I,
254 unsigned NLZ) {
255 if (NLZ > 0) {
256 // Disable the nsw and nuw flags here: We can no longer guarantee that
257 // we won't wrap after simplification. Removing the nsw/nuw flags is
258 // legal here because the top bit is not demanded.
259 I->setHasNoSignedWrap(false);
260 I->setHasNoUnsignedWrap(false);
261 }
262 return I;
263 };
264
265 // If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
266 // about the high bits of the operands.
267 auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
268 unsigned NLZ = DemandedMask.countl_zero();
269 // Right fill the mask of bits for the operands to demand the most
270 // significant bit and all those below it.
271 DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
272 if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromOps) ||
273 SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOps, Known&: LHSKnown, Q, Depth: Depth + 1) ||
274 ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) ||
275 SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1)) {
276 disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
277 return true;
278 }
279 return false;
280 };
281
282 switch (I->getOpcode()) {
283 default:
284 llvm::computeKnownBits(V: I, Known, Q, Depth);
285 break;
286 case Instruction::And: {
287 // If either the LHS or the RHS are Zero, the result is zero.
288 if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) ||
289 SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.Zero, Known&: LHSKnown, Q,
290 Depth: Depth + 1))
291 return I;
292
293 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
294 SQ: Q, Depth);
295
296 // If the client is only demanding bits that we know, return the known
297 // constant.
298 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
299 return Constant::getIntegerValue(Ty: VTy, V: Known.One);
300
301 // If all of the demanded bits are known 1 on one side, return the other.
302 // These bits cannot contribute to the result of the 'and'.
303 if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
304 return I->getOperand(i: 0);
305 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
306 return I->getOperand(i: 1);
307
308 // If the RHS is a constant, see if we can simplify it.
309 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask & ~LHSKnown.Zero))
310 return I;
311
312 break;
313 }
314 case Instruction::Or: {
315 // If either the LHS or the RHS are One, the result is One.
316 if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) ||
317 SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMask & ~RHSKnown.One, Known&: LHSKnown, Q,
318 Depth: Depth + 1)) {
319 // Disjoint flag may not longer hold.
320 I->dropPoisonGeneratingFlags();
321 return I;
322 }
323
324 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
325 SQ: Q, Depth);
326
327 // If the client is only demanding bits that we know, return the known
328 // constant.
329 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
330 return Constant::getIntegerValue(Ty: VTy, V: Known.One);
331
332 // If all of the demanded bits are known zero on one side, return the other.
333 // These bits cannot contribute to the result of the 'or'.
334 if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
335 return I->getOperand(i: 0);
336 if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
337 return I->getOperand(i: 1);
338
339 // If the RHS is a constant, see if we can simplify it.
340 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask))
341 return I;
342
343 // Infer disjoint flag if no common bits are set.
344 if (!cast<PossiblyDisjointInst>(Val: I)->isDisjoint()) {
345 WithCache<const Value *> LHSCache(I->getOperand(i: 0), LHSKnown),
346 RHSCache(I->getOperand(i: 1), RHSKnown);
347 if (haveNoCommonBitsSet(LHSCache, RHSCache, SQ: Q)) {
348 cast<PossiblyDisjointInst>(Val: I)->setIsDisjoint(true);
349 return I;
350 }
351 }
352
353 break;
354 }
355 case Instruction::Xor: {
356 if (SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) ||
357 SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1))
358 return I;
359 Value *LHS, *RHS;
360 if (DemandedMask == 1 && match(V: I->getOperand(i: 0), P: m_Ctpop(Op0: m_Value(V&: LHS))) &&
361 match(V: I->getOperand(i: 1), P: m_Ctpop(Op0: m_Value(V&: RHS)))) {
362 // (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
363 IRBuilderBase::InsertPointGuard Guard(Builder);
364 Builder.SetInsertPoint(I);
365 auto *Xor = Builder.CreateXor(LHS, RHS);
366 return Builder.CreateUnaryIntrinsic(ID: Intrinsic::ctpop, Op: Xor);
367 }
368
369 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
370 SQ: Q, Depth);
371
372 // If the client is only demanding bits that we know, return the known
373 // constant.
374 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
375 return Constant::getIntegerValue(Ty: VTy, V: Known.One);
376
377 // If all of the demanded bits are known zero on one side, return the other.
378 // These bits cannot contribute to the result of the 'xor'.
379 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero))
380 return I->getOperand(i: 0);
381 if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero))
382 return I->getOperand(i: 1);
383
384 // If all of the demanded bits are known to be zero on one side or the
385 // other, turn this into an *inclusive* or.
386 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
387 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.Zero)) {
388 Instruction *Or =
389 BinaryOperator::CreateOr(V1: I->getOperand(i: 0), V2: I->getOperand(i: 1));
390 if (DemandedMask.isAllOnes())
391 cast<PossiblyDisjointInst>(Val: Or)->setIsDisjoint(true);
392 Or->takeName(V: I);
393 return InsertNewInstWith(New: Or, Old: I->getIterator());
394 }
395
396 // If all of the demanded bits on one side are known, and all of the set
397 // bits on that side are also known to be set on the other side, turn this
398 // into an AND, as we know the bits will be cleared.
399 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
400 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero|RHSKnown.One) &&
401 RHSKnown.One.isSubsetOf(RHS: LHSKnown.One)) {
402 Constant *AndC = Constant::getIntegerValue(Ty: VTy,
403 V: ~RHSKnown.One & DemandedMask);
404 Instruction *And = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC);
405 return InsertNewInstWith(New: And, Old: I->getIterator());
406 }
407
408 // If the RHS is a constant, see if we can change it. Don't alter a -1
409 // constant because that's a canonical 'not' op, and that is better for
410 // combining, SCEV, and codegen.
411 const APInt *C;
412 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && !C->isAllOnes()) {
413 if ((*C | ~DemandedMask).isAllOnes()) {
414 // Force bits to 1 to create a 'not' op.
415 I->setOperand(i: 1, Val: ConstantInt::getAllOnesValue(Ty: VTy));
416 return I;
417 }
418 // If we can't turn this into a 'not', try to shrink the constant.
419 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedMask))
420 return I;
421 }
422
423 // If our LHS is an 'and' and if it has one use, and if any of the bits we
424 // are flipping are known to be set, then the xor is just resetting those
425 // bits to zero. We can just knock out bits from the 'and' and the 'xor',
426 // simplifying both of them.
427 if (Instruction *LHSInst = dyn_cast<Instruction>(Val: I->getOperand(i: 0))) {
428 ConstantInt *AndRHS, *XorRHS;
429 if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
430 match(V: I->getOperand(i: 1), P: m_ConstantInt(CI&: XorRHS)) &&
431 match(V: LHSInst->getOperand(i: 1), P: m_ConstantInt(CI&: AndRHS)) &&
432 (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
433 APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
434
435 Constant *AndC = ConstantInt::get(Ty: VTy, V: NewMask & AndRHS->getValue());
436 Instruction *NewAnd = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: AndC);
437 InsertNewInstWith(New: NewAnd, Old: I->getIterator());
438
439 Constant *XorC = ConstantInt::get(Ty: VTy, V: NewMask & XorRHS->getValue());
440 Instruction *NewXor = BinaryOperator::CreateXor(V1: NewAnd, V2: XorC);
441 return InsertNewInstWith(New: NewXor, Old: I->getIterator());
442 }
443 }
444 break;
445 }
446 case Instruction::Select: {
447 if (SimplifyDemandedBits(I, OpNo: 2, DemandedMask, Known&: RHSKnown, Q, Depth: Depth + 1) ||
448 SimplifyDemandedBits(I, OpNo: 1, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1))
449 return I;
450
451 // If the operands are constants, see if we can simplify them.
452 // This is similar to ShrinkDemandedConstant, but for a select we want to
453 // try to keep the selected constants the same as icmp value constants, if
454 // we can. This helps not break apart (or helps put back together)
455 // canonical patterns like min and max.
456 auto CanonicalizeSelectConstant = [](Instruction *I, unsigned OpNo,
457 const APInt &DemandedMask) {
458 const APInt *SelC;
459 if (!match(V: I->getOperand(i: OpNo), P: m_APInt(Res&: SelC)))
460 return false;
461
462 // Get the constant out of the ICmp, if there is one.
463 // Only try this when exactly 1 operand is a constant (if both operands
464 // are constant, the icmp should eventually simplify). Otherwise, we may
465 // invert the transform that reduces set bits and infinite-loop.
466 Value *X;
467 const APInt *CmpC;
468 if (!match(V: I->getOperand(i: 0), P: m_ICmp(L: m_Value(V&: X), R: m_APInt(Res&: CmpC))) ||
469 isa<Constant>(Val: X) || CmpC->getBitWidth() != SelC->getBitWidth())
470 return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask);
471
472 // If the constant is already the same as the ICmp, leave it as-is.
473 if (*CmpC == *SelC)
474 return false;
475 // If the constants are not already the same, but can be with the demand
476 // mask, use the constant value from the ICmp.
477 if ((*CmpC & DemandedMask) == (*SelC & DemandedMask)) {
478 I->setOperand(i: OpNo, Val: ConstantInt::get(Ty: I->getType(), V: *CmpC));
479 return true;
480 }
481 return ShrinkDemandedConstant(I, OpNo, Demanded: DemandedMask);
482 };
483 if (CanonicalizeSelectConstant(I, 1, DemandedMask) ||
484 CanonicalizeSelectConstant(I, 2, DemandedMask))
485 return I;
486
487 // Only known if known in both the LHS and RHS.
488 adjustKnownBitsForSelectArm(Known&: LHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1),
489 /*Invert=*/false, Q, Depth);
490 adjustKnownBitsForSelectArm(Known&: RHSKnown, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2),
491 /*Invert=*/true, Q, Depth);
492 Known = LHSKnown.intersectWith(RHS: RHSKnown);
493 break;
494 }
495 case Instruction::Trunc: {
496 // If we do not demand the high bits of a right-shifted and truncated value,
497 // then we may be able to truncate it before the shift.
498 Value *X;
499 const APInt *C;
500 if (match(V: I->getOperand(i: 0), P: m_OneUse(SubPattern: m_LShr(L: m_Value(V&: X), R: m_APInt(Res&: C))))) {
501 // The shift amount must be valid (not poison) in the narrow type, and
502 // it must not be greater than the high bits demanded of the result.
503 if (C->ult(RHS: VTy->getScalarSizeInBits()) &&
504 C->ule(RHS: DemandedMask.countl_zero())) {
505 // trunc (lshr X, C) --> lshr (trunc X), C
506 IRBuilderBase::InsertPointGuard Guard(Builder);
507 Builder.SetInsertPoint(I);
508 Value *Trunc = Builder.CreateTrunc(V: X, DestTy: VTy);
509 return Builder.CreateLShr(LHS: Trunc, RHS: C->getZExtValue());
510 }
511 }
512 }
513 [[fallthrough]];
514 case Instruction::ZExt: {
515 unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits();
516
517 APInt InputDemandedMask = DemandedMask.zextOrTrunc(width: SrcBitWidth);
518 KnownBits InputKnown(SrcBitWidth);
519 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedMask, Known&: InputKnown, Q,
520 Depth: Depth + 1)) {
521 // For zext nneg, we may have dropped the instruction which made the
522 // input non-negative.
523 I->dropPoisonGeneratingFlags();
524 return I;
525 }
526 assert(InputKnown.getBitWidth() == SrcBitWidth && "Src width changed?");
527 if (I->getOpcode() == Instruction::ZExt && I->hasNonNeg() &&
528 !InputKnown.isNegative())
529 InputKnown.makeNonNegative();
530 Known = InputKnown.zextOrTrunc(BitWidth);
531
532 break;
533 }
534 case Instruction::SExt: {
535 // Compute the bits in the result that are not present in the input.
536 unsigned SrcBitWidth = I->getOperand(i: 0)->getType()->getScalarSizeInBits();
537
538 APInt InputDemandedBits = DemandedMask.trunc(width: SrcBitWidth);
539
540 // If any of the sign extended bits are demanded, we know that the sign
541 // bit is demanded.
542 if (DemandedMask.getActiveBits() > SrcBitWidth)
543 InputDemandedBits.setBit(SrcBitWidth-1);
544
545 KnownBits InputKnown(SrcBitWidth);
546 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: InputDemandedBits, Known&: InputKnown, Q, Depth: Depth + 1))
547 return I;
548
549 // If the input sign bit is known zero, or if the NewBits are not demanded
550 // convert this into a zero extension.
551 if (InputKnown.isNonNegative() ||
552 DemandedMask.getActiveBits() <= SrcBitWidth) {
553 // Convert to ZExt cast.
554 CastInst *NewCast = new ZExtInst(I->getOperand(i: 0), VTy);
555 NewCast->takeName(V: I);
556 return InsertNewInstWith(New: NewCast, Old: I->getIterator());
557 }
558
559 // If the sign bit of the input is known set or clear, then we know the
560 // top bits of the result.
561 Known = InputKnown.sext(BitWidth);
562 break;
563 }
564 case Instruction::Add: {
565 if ((DemandedMask & 1) == 0) {
566 // If we do not need the low bit, try to convert bool math to logic:
567 // add iN (zext i1 X), (sext i1 Y) --> sext (~X & Y) to iN
568 Value *X, *Y;
569 if (match(V: I, P: m_c_Add(L: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))),
570 R: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: Y))))) &&
571 X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType()) {
572 // Truth table for inputs and output signbits:
573 // X:0 | X:1
574 // ----------
575 // Y:0 | 0 | 0 |
576 // Y:1 | -1 | 0 |
577 // ----------
578 IRBuilderBase::InsertPointGuard Guard(Builder);
579 Builder.SetInsertPoint(I);
580 Value *AndNot = Builder.CreateAnd(LHS: Builder.CreateNot(V: X), RHS: Y);
581 return Builder.CreateSExt(V: AndNot, DestTy: VTy);
582 }
583
584 // add iN (sext i1 X), (sext i1 Y) --> sext (X | Y) to iN
585 if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: X)), R: m_SExt(Op: m_Value(V&: Y)))) &&
586 X->getType()->isIntOrIntVectorTy(BitWidth: 1) && X->getType() == Y->getType() &&
587 (I->getOperand(i: 0)->hasOneUse() || I->getOperand(i: 1)->hasOneUse())) {
588
589 // Truth table for inputs and output signbits:
590 // X:0 | X:1
591 // -----------
592 // Y:0 | 0 | -1 |
593 // Y:1 | -1 | -1 |
594 // -----------
595 IRBuilderBase::InsertPointGuard Guard(Builder);
596 Builder.SetInsertPoint(I);
597 Value *Or = Builder.CreateOr(LHS: X, RHS: Y);
598 return Builder.CreateSExt(V: Or, DestTy: VTy);
599 }
600 }
601
602 // Right fill the mask of bits for the operands to demand the most
603 // significant bit and all those below it.
604 unsigned NLZ = DemandedMask.countl_zero();
605 APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
606 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) ||
607 SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1))
608 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
609
610 // If low order bits are not demanded and known to be zero in one operand,
611 // then we don't need to demand them from the other operand, since they
612 // can't cause overflow into any bits that are demanded in the result.
613 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
614 APInt DemandedFromLHS = DemandedFromOps;
615 DemandedFromLHS.clearLowBits(loBits: NTZ);
616 if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) ||
617 SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1))
618 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
619
620 unsigned NtzLHS = (~DemandedMask & LHSKnown.Zero).countr_one();
621 APInt DemandedFromRHS = DemandedFromOps;
622 DemandedFromRHS.clearLowBits(loBits: NtzLHS);
623 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromRHS))
624 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
625
626 // If we are known to be adding zeros to every bit below
627 // the highest demanded bit, we just return the other side.
628 if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
629 return I->getOperand(i: 0);
630 if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
631 return I->getOperand(i: 1);
632
633 // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
634 {
635 const APInt *C;
636 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) &&
637 C->isOneBitSet(BitNo: DemandedMask.getActiveBits() - 1)) {
638 IRBuilderBase::InsertPointGuard Guard(Builder);
639 Builder.SetInsertPoint(I);
640 return Builder.CreateXor(LHS: I->getOperand(i: 0), RHS: ConstantInt::get(Ty: VTy, V: *C));
641 }
642 }
643
644 // Otherwise just compute the known bits of the result.
645 bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
646 bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
647 Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW);
648 break;
649 }
650 case Instruction::Sub: {
651 // Right fill the mask of bits for the operands to demand the most
652 // significant bit and all those below it.
653 unsigned NLZ = DemandedMask.countl_zero();
654 APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
655 if (ShrinkDemandedConstant(I, OpNo: 1, Demanded: DemandedFromOps) ||
656 SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedFromOps, Known&: RHSKnown, Q, Depth: Depth + 1))
657 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
658
659 // If low order bits are not demanded and are known to be zero in RHS,
660 // then we don't need to demand them from LHS, since they can't cause a
661 // borrow from any bits that are demanded in the result.
662 unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
663 APInt DemandedFromLHS = DemandedFromOps;
664 DemandedFromLHS.clearLowBits(loBits: NTZ);
665 if (ShrinkDemandedConstant(I, OpNo: 0, Demanded: DemandedFromLHS) ||
666 SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromLHS, Known&: LHSKnown, Q, Depth: Depth + 1))
667 return disableWrapFlagsBasedOnUnusedHighBits(I, NLZ);
668
669 // If we are known to be subtracting zeros from every bit below
670 // the highest demanded bit, we just return the other side.
671 if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
672 return I->getOperand(i: 0);
673 // We can't do this with the LHS for subtraction, unless we are only
674 // demanding the LSB.
675 if (DemandedFromOps.isOne() && DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
676 return I->getOperand(i: 1);
677
678 // Canonicalize sub mask, X -> ~X
679 const APInt *LHSC;
680 if (match(V: I->getOperand(i: 0), P: m_LowBitMask(V&: LHSC)) &&
681 DemandedFromOps.isSubsetOf(RHS: *LHSC)) {
682 IRBuilderBase::InsertPointGuard Guard(Builder);
683 Builder.SetInsertPoint(I);
684 return Builder.CreateNot(V: I->getOperand(i: 1));
685 }
686
687 // Otherwise just compute the known bits of the result.
688 bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
689 bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
690 Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW);
691 break;
692 }
693 case Instruction::Mul: {
694 APInt DemandedFromOps;
695 if (simplifyOperandsBasedOnUnusedHighBits(DemandedFromOps))
696 return I;
697
698 if (DemandedMask.isPowerOf2()) {
699 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
700 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
701 // odd (has LSB set), then the left-shifted low bit of X is the answer.
702 unsigned CTZ = DemandedMask.countr_zero();
703 const APInt *C;
704 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: C)) && C->countr_zero() == CTZ) {
705 Constant *ShiftC = ConstantInt::get(Ty: VTy, V: CTZ);
706 Instruction *Shl = BinaryOperator::CreateShl(V1: I->getOperand(i: 0), V2: ShiftC);
707 return InsertNewInstWith(New: Shl, Old: I->getIterator());
708 }
709 }
710 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
711 // X * X is odd iff X is odd.
712 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
713 if (I->getOperand(i: 0) == I->getOperand(i: 1) && DemandedMask.ult(RHS: 4)) {
714 Constant *One = ConstantInt::get(Ty: VTy, V: 1);
715 Instruction *And1 = BinaryOperator::CreateAnd(V1: I->getOperand(i: 0), V2: One);
716 return InsertNewInstWith(New: And1, Old: I->getIterator());
717 }
718
719 llvm::computeKnownBits(V: I, Known, Q, Depth);
720 break;
721 }
722 case Instruction::Shl: {
723 const APInt *SA;
724 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) {
725 const APInt *ShrAmt;
726 if (match(V: I->getOperand(i: 0), P: m_Shr(L: m_Value(), R: m_APInt(Res&: ShrAmt))))
727 if (Instruction *Shr = dyn_cast<Instruction>(Val: I->getOperand(i: 0)))
728 if (Value *R = simplifyShrShlDemandedBits(Shr, ShrOp1: *ShrAmt, Shl: I, ShlOp1: *SA,
729 DemandedMask, Known))
730 return R;
731
732 // Do not simplify if shl is part of funnel-shift pattern
733 if (I->hasOneUse()) {
734 Instruction *Inst = I->user_back();
735 if (Inst->getOpcode() == BinaryOperator::Or) {
736 if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) {
737 auto [IID, FShiftArgs] = *Opt;
738 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
739 FShiftArgs[0] == FShiftArgs[1]) {
740 llvm::computeKnownBits(V: I, Known, Q, Depth);
741 break;
742 }
743 }
744 }
745 }
746
747 // We only want bits that already match the signbit then we don't
748 // need to shift.
749 uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth - 1);
750 if (DemandedMask.countr_zero() >= ShiftAmt) {
751 if (I->hasNoSignedWrap()) {
752 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
753 unsigned SignBits =
754 ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1);
755 if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits)
756 return I->getOperand(i: 0);
757 }
758
759 // If we can pre-shift a right-shifted constant to the left without
760 // losing any high bits and we don't demand the low bits, then eliminate
761 // the left-shift:
762 // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X
763 Value *X;
764 Constant *C;
765 if (match(V: I->getOperand(i: 0), P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) {
766 Constant *LeftShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt);
767 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: C,
768 RHS: LeftShiftAmtC, DL);
769 if (ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: NewC,
770 RHS: LeftShiftAmtC, DL) == C) {
771 Instruction *Lshr = BinaryOperator::CreateLShr(V1: NewC, V2: X);
772 return InsertNewInstWith(New: Lshr, Old: I->getIterator());
773 }
774 }
775 }
776
777 APInt DemandedMaskIn(DemandedMask.lshr(shiftAmt: ShiftAmt));
778
779 // If the shift is NUW/NSW, then it does demand the high bits.
780 ShlOperator *IOp = cast<ShlOperator>(Val: I);
781 if (IOp->hasNoSignedWrap())
782 DemandedMaskIn.setHighBits(ShiftAmt+1);
783 else if (IOp->hasNoUnsignedWrap())
784 DemandedMaskIn.setHighBits(ShiftAmt);
785
786 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1))
787 return I;
788
789 Known = KnownBits::shl(LHS: Known,
790 RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)),
791 /* NUW */ IOp->hasNoUnsignedWrap(),
792 /* NSW */ IOp->hasNoSignedWrap());
793 } else {
794 // This is a variable shift, so we can't shift the demand mask by a known
795 // amount. But if we are not demanding high bits, then we are not
796 // demanding those bits from the pre-shifted operand either.
797 if (unsigned CTLZ = DemandedMask.countl_zero()) {
798 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
799 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedFromOp, Known, Q, Depth: Depth + 1)) {
800 // We can't guarantee that nsw/nuw hold after simplifying the operand.
801 I->dropPoisonGeneratingFlags();
802 return I;
803 }
804 }
805 llvm::computeKnownBits(V: I, Known, Q, Depth);
806 }
807 break;
808 }
809 case Instruction::LShr: {
810 const APInt *SA;
811 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) {
812 uint64_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1);
813
814 // Do not simplify if lshr is part of funnel-shift pattern
815 if (I->hasOneUse()) {
816 Instruction *Inst = I->user_back();
817 if (Inst->getOpcode() == BinaryOperator::Or) {
818 if (auto Opt = convertOrOfShiftsToFunnelShift(Or&: *Inst)) {
819 auto [IID, FShiftArgs] = *Opt;
820 if ((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
821 FShiftArgs[0] == FShiftArgs[1]) {
822 llvm::computeKnownBits(V: I, Known, Q, Depth);
823 break;
824 }
825 }
826 }
827 }
828
829 // If we are just demanding the shifted sign bit and below, then this can
830 // be treated as an ASHR in disguise.
831 if (DemandedMask.countl_zero() >= ShiftAmt) {
832 // If we only want bits that already match the signbit then we don't
833 // need to shift.
834 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
835 unsigned SignBits =
836 ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1);
837 if (SignBits >= NumHiDemandedBits)
838 return I->getOperand(i: 0);
839
840 // If we can pre-shift a left-shifted constant to the right without
841 // losing any low bits (we already know we don't demand the high bits),
842 // then eliminate the right-shift:
843 // (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
844 Value *X;
845 Constant *C;
846 if (match(V: I->getOperand(i: 0), P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X)))) {
847 Constant *RightShiftAmtC = ConstantInt::get(Ty: VTy, V: ShiftAmt);
848 Constant *NewC = ConstantFoldBinaryOpOperands(Opcode: Instruction::LShr, LHS: C,
849 RHS: RightShiftAmtC, DL);
850 if (ConstantFoldBinaryOpOperands(Opcode: Instruction::Shl, LHS: NewC,
851 RHS: RightShiftAmtC, DL) == C) {
852 Instruction *Shl = BinaryOperator::CreateShl(V1: NewC, V2: X);
853 return InsertNewInstWith(New: Shl, Old: I->getIterator());
854 }
855 }
856
857 const APInt *Factor;
858 if (match(V: I->getOperand(i: 0),
859 P: m_OneUse(SubPattern: m_Mul(L: m_Value(V&: X), R: m_APInt(Res&: Factor)))) &&
860 Factor->countr_zero() >= ShiftAmt) {
861 BinaryOperator *Mul = BinaryOperator::CreateMul(
862 V1: X, V2: ConstantInt::get(Ty: X->getType(), V: Factor->lshr(shiftAmt: ShiftAmt)));
863 return InsertNewInstWith(New: Mul, Old: I->getIterator());
864 }
865 }
866
867 // Unsigned shift right.
868 APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt));
869 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) {
870 // exact flag may not longer hold.
871 I->dropPoisonGeneratingFlags();
872 return I;
873 }
874 Known >>= ShiftAmt;
875 if (ShiftAmt)
876 Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
877 break;
878 }
879 if (Value *V =
880 simplifyShiftSelectingPackedElement(I, DemandedMask, IC&: *this, Depth))
881 return V;
882
883 llvm::computeKnownBits(V: I, Known, Q, Depth);
884 break;
885 }
886 case Instruction::AShr: {
887 unsigned SignBits = ComputeNumSignBits(Op: I->getOperand(i: 0), CxtI: Q.CxtI, Depth: Depth + 1);
888
889 // If we only want bits that already match the signbit then we don't need
890 // to shift.
891 unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
892 if (SignBits >= NumHiDemandedBits)
893 return I->getOperand(i: 0);
894
895 // If this is an arithmetic shift right and only the low-bit is set, we can
896 // always convert this into a logical shr, even if the shift amount is
897 // variable. The low bit of the shift cannot be an input sign bit unless
898 // the shift amount is >= the size of the datatype, which is undefined.
899 if (DemandedMask.isOne()) {
900 // Perform the logical shift right.
901 Instruction *NewVal = BinaryOperator::CreateLShr(
902 V1: I->getOperand(i: 0), V2: I->getOperand(i: 1), Name: I->getName());
903 return InsertNewInstWith(New: NewVal, Old: I->getIterator());
904 }
905
906 const APInt *SA;
907 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) {
908 uint32_t ShiftAmt = SA->getLimitedValue(Limit: BitWidth-1);
909
910 // Signed shift right.
911 APInt DemandedMaskIn(DemandedMask.shl(shiftAmt: ShiftAmt));
912 // If any of the bits being shifted in are demanded, then we should set
913 // the sign bit as demanded.
914 bool ShiftedInBitsDemanded = DemandedMask.countl_zero() < ShiftAmt;
915 if (ShiftedInBitsDemanded)
916 DemandedMaskIn.setSignBit();
917 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known, Q, Depth: Depth + 1)) {
918 // exact flag may not longer hold.
919 I->dropPoisonGeneratingFlags();
920 return I;
921 }
922
923 // If the input sign bit is known to be zero, or if none of the shifted in
924 // bits are demanded, turn this into an unsigned shift right.
925 if (Known.Zero[BitWidth - 1] || !ShiftedInBitsDemanded) {
926 BinaryOperator *LShr = BinaryOperator::CreateLShr(V1: I->getOperand(i: 0),
927 V2: I->getOperand(i: 1));
928 LShr->setIsExact(cast<BinaryOperator>(Val: I)->isExact());
929 LShr->takeName(V: I);
930 return InsertNewInstWith(New: LShr, Old: I->getIterator());
931 }
932
933 Known = KnownBits::ashr(
934 LHS: Known, RHS: KnownBits::makeConstant(C: APInt(BitWidth, ShiftAmt)),
935 ShAmtNonZero: ShiftAmt != 0, Exact: I->isExact());
936 } else {
937 llvm::computeKnownBits(V: I, Known, Q, Depth);
938 }
939 break;
940 }
941 case Instruction::UDiv: {
942 // UDiv doesn't demand low bits that are zero in the divisor.
943 const APInt *SA;
944 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: SA))) {
945 // TODO: Take the demanded mask of the result into account.
946 unsigned RHSTrailingZeros = SA->countr_zero();
947 APInt DemandedMaskIn =
948 APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - RHSTrailingZeros);
949 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskIn, Known&: LHSKnown, Q, Depth: Depth + 1)) {
950 // We can't guarantee that "exact" is still true after changing the
951 // the dividend.
952 I->dropPoisonGeneratingFlags();
953 return I;
954 }
955
956 Known = KnownBits::udiv(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *SA),
957 Exact: cast<BinaryOperator>(Val: I)->isExact());
958 } else {
959 llvm::computeKnownBits(V: I, Known, Q, Depth);
960 }
961 break;
962 }
963 case Instruction::SRem: {
964 const APInt *Rem;
965 if (match(V: I->getOperand(i: 1), P: m_APInt(Res&: Rem)) && Rem->isPowerOf2()) {
966 if (DemandedMask.ult(RHS: *Rem)) // srem won't affect demanded bits
967 return I->getOperand(i: 0);
968
969 APInt LowBits = *Rem - 1;
970 APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
971 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: Mask2, Known&: LHSKnown, Q, Depth: Depth + 1))
972 return I;
973 Known = KnownBits::srem(LHS: LHSKnown, RHS: KnownBits::makeConstant(C: *Rem));
974 break;
975 }
976
977 llvm::computeKnownBits(V: I, Known, Q, Depth);
978 break;
979 }
980 case Instruction::Call: {
981 bool KnownBitsComputed = false;
982 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
983 switch (II->getIntrinsicID()) {
984 case Intrinsic::abs: {
985 if (DemandedMask == 1)
986 return II->getArgOperand(i: 0);
987 break;
988 }
989 case Intrinsic::ctpop: {
990 // Checking if the number of clear bits is odd (parity)? If the type has
991 // an even number of bits, that's the same as checking if the number of
992 // set bits is odd, so we can eliminate the 'not' op.
993 Value *X;
994 if (DemandedMask == 1 && VTy->getScalarSizeInBits() % 2 == 0 &&
995 match(V: II->getArgOperand(i: 0), P: m_Not(V: m_Value(V&: X)))) {
996 Function *Ctpop = Intrinsic::getOrInsertDeclaration(
997 M: II->getModule(), id: Intrinsic::ctpop, OverloadTys: VTy);
998 return InsertNewInstWith(New: CallInst::Create(Func: Ctpop, Args: {X}), Old: I->getIterator());
999 }
1000 break;
1001 }
1002 case Intrinsic::bswap: {
1003 // If the only bits demanded come from one byte of the bswap result,
1004 // just shift the input byte into position to eliminate the bswap.
1005 unsigned NLZ = DemandedMask.countl_zero();
1006 unsigned NTZ = DemandedMask.countr_zero();
1007
1008 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
1009 // we need all the bits down to bit 8. Likewise, round NLZ. If we
1010 // have 14 leading zeros, round to 8.
1011 NLZ = alignDown(Value: NLZ, Align: 8);
1012 NTZ = alignDown(Value: NTZ, Align: 8);
1013 // If we need exactly one byte, we can do this transformation.
1014 if (BitWidth - NLZ - NTZ == 8) {
1015 // Replace this with either a left or right shift to get the byte into
1016 // the right place.
1017 Instruction *NewVal;
1018 if (NLZ > NTZ)
1019 NewVal = BinaryOperator::CreateLShr(
1020 V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NLZ - NTZ));
1021 else
1022 NewVal = BinaryOperator::CreateShl(
1023 V1: II->getArgOperand(i: 0), V2: ConstantInt::get(Ty: VTy, V: NTZ - NLZ));
1024 NewVal->takeName(V: I);
1025 return InsertNewInstWith(New: NewVal, Old: I->getIterator());
1026 }
1027 break;
1028 }
1029 case Intrinsic::ptrmask: {
1030 unsigned MaskWidth = I->getOperand(i: 1)->getType()->getScalarSizeInBits();
1031 RHSKnown = KnownBits(MaskWidth);
1032 // If either the LHS or the RHS are Zero, the result is zero.
1033 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask, Known&: LHSKnown, Q, Depth: Depth + 1) ||
1034 SimplifyDemandedBits(
1035 I, OpNo: 1, DemandedMask: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth),
1036 Known&: RHSKnown, Q, Depth: Depth + 1))
1037 return I;
1038
1039 // TODO: Should be 1-extend
1040 RHSKnown = RHSKnown.anyextOrTrunc(BitWidth);
1041
1042 Known = LHSKnown & RHSKnown;
1043 KnownBitsComputed = true;
1044
1045 // If the client is only demanding bits we know to be zero, return
1046 // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
1047 // provenance, but making the mask zero will be easily optimizable in
1048 // the backend.
1049 if (DemandedMask.isSubsetOf(RHS: Known.Zero) &&
1050 !match(V: I->getOperand(i: 1), P: m_Zero()))
1051 return replaceOperand(
1052 I&: *I, OpNum: 1, V: Constant::getNullValue(Ty: I->getOperand(i: 1)->getType()));
1053
1054 // Mask in demanded space does nothing.
1055 // NOTE: We may have attributes associated with the return value of the
1056 // llvm.ptrmask intrinsic that will be lost when we just return the
1057 // operand. We should try to preserve them.
1058 if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
1059 return I->getOperand(i: 0);
1060
1061 // If the RHS is a constant, see if we can simplify it.
1062 if (ShrinkDemandedConstant(
1063 I, OpNo: 1, Demanded: (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(width: MaskWidth)))
1064 return I;
1065
1066 // Combine:
1067 // (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1068 // -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1069 // where only the low bits known to be zero in the pointer are changed
1070 Value *InnerPtr;
1071 uint64_t GEPIndex;
1072 uint64_t PtrMaskImmediate;
1073 if (match(V: I, P: m_Intrinsic<Intrinsic::ptrmask>(
1074 Op0: m_PtrAdd(PointerOp: m_Value(V&: InnerPtr), OffsetOp: m_ConstantInt(V&: GEPIndex)),
1075 Op1: m_ConstantInt(V&: PtrMaskImmediate)))) {
1076
1077 LHSKnown = computeKnownBits(V: InnerPtr, CxtI: I, Depth: Depth + 1);
1078 if (!LHSKnown.isZero()) {
1079 const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1080 uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;
1081
1082 uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1083 uint64_t MaskedLowBitsGEPIndex =
1084 GEPIndex & PointerAlignBits & PtrMaskImmediate;
1085
1086 uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
1087
1088 if (MaskedGEPIndex != GEPIndex) {
1089 auto *GEP = cast<GEPOperator>(Val: II->getArgOperand(i: 0));
1090 Builder.SetInsertPoint(I);
1091 Type *GEPIndexType =
1092 DL.getIndexType(PtrTy: GEP->getPointerOperand()->getType());
1093 Value *MaskedGEP = Builder.CreateGEP(
1094 Ty: GEP->getSourceElementType(), Ptr: InnerPtr,
1095 IdxList: ConstantInt::get(Ty: GEPIndexType, V: MaskedGEPIndex),
1096 Name: GEP->getName(), NW: GEP->isInBounds());
1097
1098 replaceOperand(I&: *I, OpNum: 0, V: MaskedGEP);
1099 return I;
1100 }
1101 }
1102 }
1103
1104 break;
1105 }
1106
1107 case Intrinsic::fshr:
1108 case Intrinsic::fshl: {
1109 const APInt *SA;
1110 if (!match(V: I->getOperand(i: 2), P: m_APInt(Res&: SA)))
1111 break;
1112
1113 // Normalize to funnel shift left. APInt shifts of BitWidth are well-
1114 // defined, so no need to special-case zero shifts here.
1115 uint64_t ShiftAmt = SA->urem(RHS: BitWidth);
1116 if (II->getIntrinsicID() == Intrinsic::fshr)
1117 ShiftAmt = BitWidth - ShiftAmt;
1118
1119 APInt DemandedMaskLHS(DemandedMask.lshr(shiftAmt: ShiftAmt));
1120 APInt DemandedMaskRHS(DemandedMask.shl(shiftAmt: BitWidth - ShiftAmt));
1121 if (I->getOperand(i: 0) != I->getOperand(i: 1)) {
1122 if (SimplifyDemandedBits(I, OpNo: 0, DemandedMask: DemandedMaskLHS, Known&: LHSKnown, Q,
1123 Depth: Depth + 1) ||
1124 SimplifyDemandedBits(I, OpNo: 1, DemandedMask: DemandedMaskRHS, Known&: RHSKnown, Q,
1125 Depth: Depth + 1)) {
1126 // Range attribute or metadata may no longer hold.
1127 I->dropPoisonGeneratingAnnotations();
1128 return I;
1129 }
1130 } else { // fshl is a rotate
1131 // Avoid converting rotate into funnel shift.
1132 // Only simplify if one operand is constant.
1133 LHSKnown = computeKnownBits(V: I->getOperand(i: 0), CxtI: I, Depth: Depth + 1);
1134 if (DemandedMaskLHS.isSubsetOf(RHS: LHSKnown.Zero | LHSKnown.One) &&
1135 !match(V: I->getOperand(i: 0), P: m_SpecificInt(V: LHSKnown.One))) {
1136 replaceOperand(I&: *I, OpNum: 0, V: Constant::getIntegerValue(Ty: VTy, V: LHSKnown.One));
1137 return I;
1138 }
1139
1140 RHSKnown = computeKnownBits(V: I->getOperand(i: 1), CxtI: I, Depth: Depth + 1);
1141 if (DemandedMaskRHS.isSubsetOf(RHS: RHSKnown.Zero | RHSKnown.One) &&
1142 !match(V: I->getOperand(i: 1), P: m_SpecificInt(V: RHSKnown.One))) {
1143 replaceOperand(I&: *I, OpNum: 1, V: Constant::getIntegerValue(Ty: VTy, V: RHSKnown.One));
1144 return I;
1145 }
1146 }
1147
1148 LHSKnown <<= ShiftAmt;
1149 RHSKnown >>= BitWidth - ShiftAmt;
1150 Known = LHSKnown.unionWith(RHS: RHSKnown);
1151 KnownBitsComputed = true;
1152 break;
1153 }
1154 case Intrinsic::umax: {
1155 // UMax(A, C) == A if ...
1156 // The lowest non-zero bit of DemandMask is higher than the highest
1157 // non-zero bit of C.
1158 const APInt *C;
1159 unsigned CTZ = DemandedMask.countr_zero();
1160 if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) &&
1161 CTZ >= C->getActiveBits())
1162 return II->getArgOperand(i: 0);
1163 break;
1164 }
1165 case Intrinsic::umin: {
1166 // UMin(A, C) == A if ...
1167 // The lowest non-zero bit of DemandMask is higher than the highest
1168 // non-one bit of C.
1169 // This comes from using DeMorgans on the above umax example.
1170 const APInt *C;
1171 unsigned CTZ = DemandedMask.countr_zero();
1172 if (match(V: II->getArgOperand(i: 1), P: m_APInt(Res&: C)) &&
1173 CTZ >= C->getBitWidth() - C->countl_one())
1174 return II->getArgOperand(i: 0);
1175 break;
1176 }
1177 default: {
1178 // Handle target specific intrinsics
1179 std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
1180 II&: *II, DemandedMask, Known, KnownBitsComputed);
1181 if (V)
1182 return *V;
1183 break;
1184 }
1185 }
1186 }
1187
1188 if (!KnownBitsComputed)
1189 llvm::computeKnownBits(V: I, Known, Q, Depth);
1190 break;
1191 }
1192 }
1193
1194 if (I->getType()->isPointerTy()) {
1195 Align Alignment = I->getPointerAlignment(DL);
1196 Known.Zero.setLowBits(Log2(A: Alignment));
1197 }
1198
1199 // If the client is only demanding bits that we know, return the known
1200 // constant. We can't directly simplify pointers as a constant because of
1201 // pointer provenance.
1202 // TODO: We could return `(inttoptr const)` for pointers.
1203 if (!I->getType()->isPointerTy() &&
1204 DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
1205 return Constant::getIntegerValue(Ty: VTy, V: Known.One);
1206
1207 if (VerifyKnownBits) {
1208 KnownBits ReferenceKnown = llvm::computeKnownBits(V: I, Q, Depth);
1209 if (Known != ReferenceKnown) {
1210 errs() << "Mismatched known bits for " << *I << " in "
1211 << I->getFunction()->getName() << "\n";
1212 errs() << "computeKnownBits(): " << ReferenceKnown << "\n";
1213 errs() << "SimplifyDemandedBits(): " << Known << "\n";
1214 std::abort();
1215 }
1216 }
1217
1218 return nullptr;
1219}
1220
1221/// Helper routine of SimplifyDemandedUseBits. It computes Known
1222/// bits. It also tries to handle simplifications that can be done based on
1223/// DemandedMask, but without modifying the Instruction.
1224Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
1225 Instruction *I, const APInt &DemandedMask, KnownBits &Known,
1226 const SimplifyQuery &Q, unsigned Depth) {
1227 unsigned BitWidth = DemandedMask.getBitWidth();
1228 Type *ITy = I->getType();
1229
1230 KnownBits LHSKnown(BitWidth);
1231 KnownBits RHSKnown(BitWidth);
1232
1233 // Despite the fact that we can't simplify this instruction in all User's
1234 // context, we can at least compute the known bits, and we can
1235 // do simplifications that apply to *just* the one user if we know that
1236 // this instruction has a simpler value in that context.
1237 switch (I->getOpcode()) {
1238 case Instruction::And: {
1239 llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1);
1240 llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1);
1241 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1242 SQ: Q, Depth);
1243 computeKnownBitsFromContext(V: I, Known, Q, Depth);
1244
1245 // If the client is only demanding bits that we know, return the known
1246 // constant.
1247 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
1248 return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1249
1250 // If all of the demanded bits are known 1 on one side, return the other.
1251 // These bits cannot contribute to the result of the 'and' in this context.
1252 if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
1253 return I->getOperand(i: 0);
1254 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
1255 return I->getOperand(i: 1);
1256
1257 break;
1258 }
1259 case Instruction::Or: {
1260 llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1);
1261 llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1);
1262 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1263 SQ: Q, Depth);
1264 computeKnownBitsFromContext(V: I, Known, Q, Depth);
1265
1266 // If the client is only demanding bits that we know, return the known
1267 // constant.
1268 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
1269 return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1270
1271 // We can simplify (X|Y) -> X or Y in the user's context if we know that
1272 // only bits from X or Y are demanded.
1273 // If all of the demanded bits are known zero on one side, return the other.
1274 // These bits cannot contribute to the result of the 'or' in this context.
1275 if (DemandedMask.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
1276 return I->getOperand(i: 0);
1277 if (DemandedMask.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
1278 return I->getOperand(i: 1);
1279
1280 break;
1281 }
1282 case Instruction::Xor: {
1283 llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1);
1284 llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1);
1285 Known = analyzeKnownBitsFromAndXorOr(I: cast<Operator>(Val: I), KnownLHS: LHSKnown, KnownRHS: RHSKnown,
1286 SQ: Q, Depth);
1287 computeKnownBitsFromContext(V: I, Known, Q, Depth);
1288
1289 // If the client is only demanding bits that we know, return the known
1290 // constant.
1291 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
1292 return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1293
1294 // We can simplify (X^Y) -> X or Y in the user's context if we know that
1295 // only bits from X or Y are demanded.
1296 // If all of the demanded bits are known zero on one side, return the other.
1297 if (DemandedMask.isSubsetOf(RHS: RHSKnown.Zero))
1298 return I->getOperand(i: 0);
1299 if (DemandedMask.isSubsetOf(RHS: LHSKnown.Zero))
1300 return I->getOperand(i: 1);
1301
1302 break;
1303 }
1304 case Instruction::Add: {
1305 unsigned NLZ = DemandedMask.countl_zero();
1306 APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
1307
1308 // If an operand adds zeros to every bit below the highest demanded bit,
1309 // that operand doesn't change the result. Return the other side.
1310 llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1);
1311 if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
1312 return I->getOperand(i: 0);
1313
1314 llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1);
1315 if (DemandedFromOps.isSubsetOf(RHS: LHSKnown.Zero))
1316 return I->getOperand(i: 1);
1317
1318 bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
1319 bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
1320 Known = KnownBits::add(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW);
1321 computeKnownBitsFromContext(V: I, Known, Q, Depth);
1322 break;
1323 }
1324 case Instruction::Sub: {
1325 unsigned NLZ = DemandedMask.countl_zero();
1326 APInt DemandedFromOps = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - NLZ);
1327
1328 // If an operand subtracts zeros from every bit below the highest demanded
1329 // bit, that operand doesn't change the result. Return the other side.
1330 llvm::computeKnownBits(V: I->getOperand(i: 1), Known&: RHSKnown, Q, Depth: Depth + 1);
1331 if (DemandedFromOps.isSubsetOf(RHS: RHSKnown.Zero))
1332 return I->getOperand(i: 0);
1333
1334 bool NSW = cast<OverflowingBinaryOperator>(Val: I)->hasNoSignedWrap();
1335 bool NUW = cast<OverflowingBinaryOperator>(Val: I)->hasNoUnsignedWrap();
1336 llvm::computeKnownBits(V: I->getOperand(i: 0), Known&: LHSKnown, Q, Depth: Depth + 1);
1337 Known = KnownBits::sub(LHS: LHSKnown, RHS: RHSKnown, NSW, NUW);
1338 computeKnownBitsFromContext(V: I, Known, Q, Depth);
1339 break;
1340 }
1341 case Instruction::AShr: {
1342 // Compute the Known bits to simplify things downstream.
1343 llvm::computeKnownBits(V: I, Known, Q, Depth);
1344
1345 // If this user is only demanding bits that we know, return the known
1346 // constant.
1347 if (DemandedMask.isSubsetOf(RHS: Known.Zero | Known.One))
1348 return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1349
1350 // If the right shift operand 0 is a result of a left shift by the same
1351 // amount, this is probably a zero/sign extension, which may be unnecessary,
1352 // if we do not demand any of the new sign bits. So, return the original
1353 // operand instead.
1354 const APInt *ShiftRC;
1355 const APInt *ShiftLC;
1356 Value *X;
1357 unsigned BitWidth = DemandedMask.getBitWidth();
1358 if (match(V: I,
1359 P: m_AShr(L: m_Shl(L: m_Value(V&: X), R: m_APInt(Res&: ShiftLC)), R: m_APInt(Res&: ShiftRC))) &&
1360 ShiftLC == ShiftRC && ShiftLC->ult(RHS: BitWidth) &&
1361 DemandedMask.isSubsetOf(RHS: APInt::getLowBitsSet(
1362 numBits: BitWidth, loBitsSet: BitWidth - ShiftRC->getZExtValue()))) {
1363 return X;
1364 }
1365
1366 break;
1367 }
1368 default:
1369 // Compute the Known bits to simplify things downstream.
1370 llvm::computeKnownBits(V: I, Known, Q, Depth);
1371
1372 // If this user is only demanding bits that we know, return the known
1373 // constant.
1374 if (DemandedMask.isSubsetOf(RHS: Known.Zero|Known.One))
1375 return Constant::getIntegerValue(Ty: ITy, V: Known.One);
1376
1377 break;
1378 }
1379
1380 return nullptr;
1381}
1382
1383/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
1384/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
1385/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
1386/// of "C2-C1".
1387///
1388/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
1389/// ..., bn}, without considering the specific value X is holding.
1390/// This transformation is legal iff one of following conditions is hold:
1391/// 1) All the bit in S are 0, in this case E1 == E2.
1392/// 2) We don't care those bits in S, per the input DemandedMask.
1393/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
1394/// rest bits.
1395///
1396/// Currently we only test condition 2).
1397///
1398/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
1399/// not successful.
1400Value *InstCombinerImpl::simplifyShrShlDemandedBits(
1401 Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
1402 const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known) {
1403 if (!ShlOp1 || !ShrOp1)
1404 return nullptr; // No-op.
1405
1406 Value *VarX = Shr->getOperand(i: 0);
1407 Type *Ty = VarX->getType();
1408 unsigned BitWidth = Ty->getScalarSizeInBits();
1409 if (ShlOp1.uge(RHS: BitWidth) || ShrOp1.uge(RHS: BitWidth))
1410 return nullptr; // Undef.
1411
1412 unsigned ShlAmt = ShlOp1.getZExtValue();
1413 unsigned ShrAmt = ShrOp1.getZExtValue();
1414
1415 Known.One.clearAllBits();
1416 Known.Zero.setLowBits(ShlAmt - 1);
1417 Known.Zero &= DemandedMask;
1418
1419 APInt BitMask1(APInt::getAllOnes(numBits: BitWidth));
1420 APInt BitMask2(APInt::getAllOnes(numBits: BitWidth));
1421
1422 bool isLshr = (Shr->getOpcode() == Instruction::LShr);
1423 BitMask1 = isLshr ? (BitMask1.lshr(shiftAmt: ShrAmt) << ShlAmt) :
1424 (BitMask1.ashr(ShiftAmt: ShrAmt) << ShlAmt);
1425
1426 if (ShrAmt <= ShlAmt) {
1427 BitMask2 <<= (ShlAmt - ShrAmt);
1428 } else {
1429 BitMask2 = isLshr ? BitMask2.lshr(shiftAmt: ShrAmt - ShlAmt):
1430 BitMask2.ashr(ShiftAmt: ShrAmt - ShlAmt);
1431 }
1432
1433 // Check if condition-2 (see the comment to this function) is satified.
1434 if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
1435 if (ShrAmt == ShlAmt)
1436 return VarX;
1437
1438 if (!Shr->hasOneUse())
1439 return nullptr;
1440
1441 BinaryOperator *New;
1442 if (ShrAmt < ShlAmt) {
1443 Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShlAmt - ShrAmt);
1444 New = BinaryOperator::CreateShl(V1: VarX, V2: Amt);
1445 BinaryOperator *Orig = cast<BinaryOperator>(Val: Shl);
1446 New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
1447 New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
1448 } else {
1449 Constant *Amt = ConstantInt::get(Ty: VarX->getType(), V: ShrAmt - ShlAmt);
1450 New = isLshr ? BinaryOperator::CreateLShr(V1: VarX, V2: Amt) :
1451 BinaryOperator::CreateAShr(V1: VarX, V2: Amt);
1452 if (cast<BinaryOperator>(Val: Shr)->isExact())
1453 New->setIsExact(true);
1454 }
1455
1456 return InsertNewInstWith(New, Old: Shl->getIterator());
1457 }
1458
1459 return nullptr;
1460}
1461
1462/// The specified value produces a vector with any number of elements.
1463/// This method analyzes which elements of the operand are poison and
1464/// returns that information in PoisonElts.
1465///
1466/// DemandedElts contains the set of elements that are actually used by the
1467/// caller, and by default (AllowMultipleUsers equals false) the value is
1468/// simplified only if it has a single caller. If AllowMultipleUsers is set
1469/// to true, DemandedElts refers to the union of sets of elements that are
1470/// used by all callers.
1471///
1472/// If the information about demanded elements can be used to simplify the
1473/// operation, the operation is simplified, then the resultant value is
1474/// returned. This returns null if no change was made.
1475Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
1476 APInt DemandedElts,
1477 APInt &PoisonElts,
1478 unsigned Depth,
1479 bool AllowMultipleUsers) {
1480 // Cannot analyze scalable type. The number of vector elements is not a
1481 // compile-time constant.
1482 if (isa<ScalableVectorType>(Val: V->getType()))
1483 return nullptr;
1484
1485 unsigned VWidth = cast<FixedVectorType>(Val: V->getType())->getNumElements();
1486 APInt EltMask(APInt::getAllOnes(numBits: VWidth));
1487 assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
1488
1489 if (match(V, P: m_Poison())) {
1490 // If the entire vector is poison, just return this info.
1491 PoisonElts = EltMask;
1492 return nullptr;
1493 }
1494
1495 if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
1496 PoisonElts = EltMask;
1497 return PoisonValue::get(T: V->getType());
1498 }
1499
1500 PoisonElts = 0;
1501
1502 if (auto *C = dyn_cast<Constant>(Val: V)) {
1503 // Check if this is identity. If so, return 0 since we are not simplifying
1504 // anything.
1505 if (DemandedElts.isAllOnes())
1506 return nullptr;
1507
1508 Type *EltTy = cast<VectorType>(Val: V->getType())->getElementType();
1509 Constant *Poison = PoisonValue::get(T: EltTy);
1510 SmallVector<Constant*, 16> Elts;
1511 for (unsigned i = 0; i != VWidth; ++i) {
1512 if (!DemandedElts[i]) { // If not demanded, set to poison.
1513 Elts.push_back(Elt: Poison);
1514 PoisonElts.setBit(i);
1515 continue;
1516 }
1517
1518 Constant *Elt = C->getAggregateElement(Elt: i);
1519 if (!Elt) return nullptr;
1520
1521 Elts.push_back(Elt);
1522 if (isa<PoisonValue>(Val: Elt)) // Already poison.
1523 PoisonElts.setBit(i);
1524 }
1525
1526 // If we changed the constant, return it.
1527 Constant *NewCV = ConstantVector::get(V: Elts);
1528 return NewCV != C ? NewCV : nullptr;
1529 }
1530
1531 // Limit search depth.
1532 if (Depth == SimplifyDemandedVectorEltsDepthLimit)
1533 return nullptr;
1534
1535 if (!AllowMultipleUsers) {
1536 // If multiple users are using the root value, proceed with
1537 // simplification conservatively assuming that all elements
1538 // are needed.
1539 if (!V->hasOneUse()) {
1540 // Quit if we find multiple users of a non-root value though.
1541 // They'll be handled when it's their turn to be visited by
1542 // the main instcombine process.
1543 if (Depth != 0)
1544 // TODO: Just compute the PoisonElts information recursively.
1545 return nullptr;
1546
1547 // Conservatively assume that all elements are needed.
1548 DemandedElts = EltMask;
1549 }
1550 }
1551
1552 Instruction *I = dyn_cast<Instruction>(Val: V);
1553 if (!I) return nullptr; // Only analyze instructions.
1554
1555 bool MadeChange = false;
1556 auto simplifyAndSetOp = [&](Instruction *Inst, unsigned OpNum,
1557 APInt Demanded, APInt &Undef) {
1558 auto *II = dyn_cast<IntrinsicInst>(Val: Inst);
1559 Value *Op = II ? II->getArgOperand(i: OpNum) : Inst->getOperand(i: OpNum);
1560 if (Value *V = SimplifyDemandedVectorElts(V: Op, DemandedElts: Demanded, PoisonElts&: Undef, Depth: Depth + 1)) {
1561 replaceOperand(I&: *Inst, OpNum, V);
1562 MadeChange = true;
1563 }
1564 };
1565
1566 APInt PoisonElts2(VWidth, 0);
1567 APInt PoisonElts3(VWidth, 0);
1568 switch (I->getOpcode()) {
1569 default: break;
1570
1571 case Instruction::GetElementPtr: {
1572 // The LangRef requires that struct geps have all constant indices. As
1573 // such, we can't convert any operand to partial undef.
1574 auto mayIndexStructType = [](GetElementPtrInst &GEP) {
1575 for (auto I = gep_type_begin(GEP), E = gep_type_end(GEP);
1576 I != E; I++)
1577 if (I.isStruct())
1578 return true;
1579 return false;
1580 };
1581 if (mayIndexStructType(cast<GetElementPtrInst>(Val&: *I)))
1582 break;
1583
1584 // Conservatively track the demanded elements back through any vector
1585 // operands we may have. We know there must be at least one, or we
1586 // wouldn't have a vector result to get here. Note that we intentionally
1587 // merge the undef bits here since gepping with either an poison base or
1588 // index results in poison.
1589 for (unsigned i = 0; i < I->getNumOperands(); i++) {
1590 if (i == 0 ? match(V: I->getOperand(i), P: m_Undef())
1591 : match(V: I->getOperand(i), P: m_Poison())) {
1592 // If the entire vector is undefined, just return this info.
1593 PoisonElts = EltMask;
1594 return nullptr;
1595 }
1596 if (I->getOperand(i)->getType()->isVectorTy()) {
1597 APInt PoisonEltsOp(VWidth, 0);
1598 simplifyAndSetOp(I, i, DemandedElts, PoisonEltsOp);
1599 // gep(x, undef) is not undef, so skip considering idx ops here
1600 // Note that we could propagate poison, but we can't distinguish between
1601 // undef & poison bits ATM
1602 if (i == 0)
1603 PoisonElts |= PoisonEltsOp;
1604 }
1605 }
1606
1607 break;
1608 }
1609 case Instruction::InsertElement: {
1610 // If this is a variable index, we don't know which element it overwrites.
1611 // demand exactly the same input as we produce.
1612 ConstantInt *Idx = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2));
1613 if (!Idx) {
1614 // Note that we can't propagate undef elt info, because we don't know
1615 // which elt is getting updated.
1616 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts2);
1617 break;
1618 }
1619
1620 // The element inserted overwrites whatever was there, so the input demanded
1621 // set is simpler than the output set.
1622 unsigned IdxNo = Idx->getZExtValue();
1623 APInt PreInsertDemandedElts = DemandedElts;
1624 if (IdxNo < VWidth)
1625 PreInsertDemandedElts.clearBit(BitPosition: IdxNo);
1626
1627 // If we only demand the element that is being inserted and that element
1628 // was extracted from the same index in another vector with the same type,
1629 // replace this insert with that other vector.
1630 // Note: This is attempted before the call to simplifyAndSetOp because that
1631 // may change PoisonElts to a value that does not match with Vec.
1632 Value *Vec;
1633 if (PreInsertDemandedElts == 0 &&
1634 match(V: I->getOperand(i: 1),
1635 P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_SpecificInt(V: IdxNo))) &&
1636 Vec->getType() == I->getType()) {
1637 return Vec;
1638 }
1639
1640 simplifyAndSetOp(I, 0, PreInsertDemandedElts, PoisonElts);
1641
1642 // If this is inserting an element that isn't demanded, remove this
1643 // insertelement.
1644 if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
1645 Worklist.push(I);
1646 return I->getOperand(i: 0);
1647 }
1648
1649 // The inserted element is defined.
1650 PoisonElts.clearBit(BitPosition: IdxNo);
1651 break;
1652 }
1653 case Instruction::ShuffleVector: {
1654 auto *Shuffle = cast<ShuffleVectorInst>(Val: I);
1655 assert(Shuffle->getOperand(0)->getType() ==
1656 Shuffle->getOperand(1)->getType() &&
1657 "Expected shuffle operands to have same type");
1658 unsigned OpWidth = cast<FixedVectorType>(Val: Shuffle->getOperand(i_nocapture: 0)->getType())
1659 ->getNumElements();
1660 // Handle trivial case of a splat. Only check the first element of LHS
1661 // operand.
1662 if (all_of(Range: Shuffle->getShuffleMask(), P: equal_to(Arg: 0)) &&
1663 DemandedElts.isAllOnes()) {
1664 if (!isa<PoisonValue>(Val: I->getOperand(i: 1))) {
1665 I->setOperand(i: 1, Val: PoisonValue::get(T: I->getOperand(i: 1)->getType()));
1666 MadeChange = true;
1667 }
1668 APInt LeftDemanded(OpWidth, 1);
1669 APInt LHSPoisonElts(OpWidth, 0);
1670 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1671 if (LHSPoisonElts[0])
1672 PoisonElts = EltMask;
1673 else
1674 PoisonElts.clearAllBits();
1675 break;
1676 }
1677
1678 APInt LeftDemanded(OpWidth, 0), RightDemanded(OpWidth, 0);
1679 for (unsigned i = 0; i < VWidth; i++) {
1680 if (DemandedElts[i]) {
1681 unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1682 if (MaskVal != -1u) {
1683 assert(MaskVal < OpWidth * 2 &&
1684 "shufflevector mask index out of range!");
1685 if (MaskVal < OpWidth)
1686 LeftDemanded.setBit(MaskVal);
1687 else
1688 RightDemanded.setBit(MaskVal - OpWidth);
1689 }
1690 }
1691 }
1692
1693 APInt LHSPoisonElts(OpWidth, 0);
1694 simplifyAndSetOp(I, 0, LeftDemanded, LHSPoisonElts);
1695
1696 APInt RHSPoisonElts(OpWidth, 0);
1697 simplifyAndSetOp(I, 1, RightDemanded, RHSPoisonElts);
1698
1699 // If this shuffle does not change the vector length and the elements
1700 // demanded by this shuffle are an identity mask, then this shuffle is
1701 // unnecessary.
1702 //
1703 // We are assuming canonical form for the mask, so the source vector is
1704 // operand 0 and operand 1 is not used.
1705 //
1706 // Note that if an element is demanded and this shuffle mask is undefined
1707 // for that element, then the shuffle is not considered an identity
1708 // operation. The shuffle prevents poison from the operand vector from
1709 // leaking to the result by replacing poison with an undefined value.
1710 if (VWidth == OpWidth) {
1711 bool IsIdentityShuffle = true;
1712 for (unsigned i = 0; i < VWidth; i++) {
1713 unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1714 if (DemandedElts[i] && i != MaskVal) {
1715 IsIdentityShuffle = false;
1716 break;
1717 }
1718 }
1719 if (IsIdentityShuffle)
1720 return Shuffle->getOperand(i_nocapture: 0);
1721 }
1722
1723 bool NewPoisonElts = false;
1724 unsigned LHSIdx = -1u, LHSValIdx = -1u;
1725 unsigned RHSIdx = -1u, RHSValIdx = -1u;
1726 bool LHSUniform = true;
1727 bool RHSUniform = true;
1728 for (unsigned i = 0; i < VWidth; i++) {
1729 unsigned MaskVal = Shuffle->getMaskValue(Elt: i);
1730 if (MaskVal == -1u) {
1731 PoisonElts.setBit(i);
1732 } else if (!DemandedElts[i]) {
1733 NewPoisonElts = true;
1734 PoisonElts.setBit(i);
1735 } else if (MaskVal < OpWidth) {
1736 if (LHSPoisonElts[MaskVal]) {
1737 NewPoisonElts = true;
1738 PoisonElts.setBit(i);
1739 } else {
1740 LHSIdx = LHSIdx == -1u ? i : OpWidth;
1741 LHSValIdx = LHSValIdx == -1u ? MaskVal : OpWidth;
1742 LHSUniform = LHSUniform && (MaskVal == i);
1743 }
1744 } else {
1745 if (RHSPoisonElts[MaskVal - OpWidth]) {
1746 NewPoisonElts = true;
1747 PoisonElts.setBit(i);
1748 } else {
1749 RHSIdx = RHSIdx == -1u ? i : OpWidth;
1750 RHSValIdx = RHSValIdx == -1u ? MaskVal - OpWidth : OpWidth;
1751 RHSUniform = RHSUniform && (MaskVal - OpWidth == i);
1752 }
1753 }
1754 }
1755
1756 // Try to transform shuffle with constant vector and single element from
1757 // this constant vector to single insertelement instruction.
1758 // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
1759 // insertelement V, C[ci], ci-n
1760 if (OpWidth ==
1761 cast<FixedVectorType>(Val: Shuffle->getType())->getNumElements()) {
1762 Value *Op = nullptr;
1763 Constant *Value = nullptr;
1764 unsigned Idx = -1u;
1765
1766 // Find constant vector with the single element in shuffle (LHS or RHS).
1767 if (LHSIdx < OpWidth && RHSUniform) {
1768 if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 0))) {
1769 Op = Shuffle->getOperand(i_nocapture: 1);
1770 Value = CV->getOperand(i_nocapture: LHSValIdx);
1771 Idx = LHSIdx;
1772 }
1773 }
1774 if (RHSIdx < OpWidth && LHSUniform) {
1775 if (auto *CV = dyn_cast<ConstantVector>(Val: Shuffle->getOperand(i_nocapture: 1))) {
1776 Op = Shuffle->getOperand(i_nocapture: 0);
1777 Value = CV->getOperand(i_nocapture: RHSValIdx);
1778 Idx = RHSIdx;
1779 }
1780 }
1781 // Found constant vector with single element - convert to insertelement.
1782 if (Op && Value) {
1783 Instruction *New = InsertElementInst::Create(
1784 Vec: Op, NewElt: Value, Idx: ConstantInt::get(Ty: Type::getInt64Ty(C&: I->getContext()), V: Idx),
1785 NameStr: Shuffle->getName());
1786 InsertNewInstWith(New, Old: Shuffle->getIterator());
1787 return New;
1788 }
1789 }
1790 if (NewPoisonElts) {
1791 // Add additional discovered undefs.
1792 SmallVector<int, 16> Elts;
1793 for (unsigned i = 0; i < VWidth; ++i) {
1794 if (PoisonElts[i])
1795 Elts.push_back(Elt: PoisonMaskElem);
1796 else
1797 Elts.push_back(Elt: Shuffle->getMaskValue(Elt: i));
1798 }
1799 Shuffle->setShuffleMask(Elts);
1800 MadeChange = true;
1801 }
1802 break;
1803 }
1804 case Instruction::Select: {
1805 // If this is a vector select, try to transform the select condition based
1806 // on the current demanded elements.
1807 SelectInst *Sel = cast<SelectInst>(Val: I);
1808 if (Sel->getCondition()->getType()->isVectorTy()) {
1809 // TODO: We are not doing anything with PoisonElts based on this call.
1810 // It is overwritten below based on the other select operands. If an
1811 // element of the select condition is known undef, then we are free to
1812 // choose the output value from either arm of the select. If we know that
1813 // one of those values is undef, then the output can be undef.
1814 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1815 }
1816
1817 // Next, see if we can transform the arms of the select.
1818 APInt DemandedLHS(DemandedElts), DemandedRHS(DemandedElts);
1819 if (auto *CV = dyn_cast<ConstantVector>(Val: Sel->getCondition())) {
1820 for (unsigned i = 0; i < VWidth; i++) {
1821 Constant *CElt = CV->getAggregateElement(Elt: i);
1822
1823 // isNullValue() always returns false when called on a ConstantExpr.
1824 if (CElt->isNullValue())
1825 DemandedLHS.clearBit(BitPosition: i);
1826 else if (CElt->isOneValue())
1827 DemandedRHS.clearBit(BitPosition: i);
1828 }
1829 }
1830
1831 simplifyAndSetOp(I, 1, DemandedLHS, PoisonElts2);
1832 simplifyAndSetOp(I, 2, DemandedRHS, PoisonElts3);
1833
1834 // Output elements are undefined if the element from each arm is undefined.
1835 // TODO: This can be improved. See comment in select condition handling.
1836 PoisonElts = PoisonElts2 & PoisonElts3;
1837 break;
1838 }
1839 case Instruction::BitCast: {
1840 // Vector->vector casts only.
1841 VectorType *VTy = dyn_cast<VectorType>(Val: I->getOperand(i: 0)->getType());
1842 if (!VTy) break;
1843 unsigned InVWidth = cast<FixedVectorType>(Val: VTy)->getNumElements();
1844 APInt InputDemandedElts(InVWidth, 0);
1845 PoisonElts2 = APInt(InVWidth, 0);
1846 unsigned Ratio;
1847
1848 if (VWidth == InVWidth) {
1849 // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
1850 // elements as are demanded of us.
1851 Ratio = 1;
1852 InputDemandedElts = DemandedElts;
1853 } else if ((VWidth % InVWidth) == 0) {
1854 // If the number of elements in the output is a multiple of the number of
1855 // elements in the input then an input element is live if any of the
1856 // corresponding output elements are live.
1857 Ratio = VWidth / InVWidth;
1858 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1859 if (DemandedElts[OutIdx])
1860 InputDemandedElts.setBit(OutIdx / Ratio);
1861 } else if ((InVWidth % VWidth) == 0) {
1862 // If the number of elements in the input is a multiple of the number of
1863 // elements in the output then an input element is live if the
1864 // corresponding output element is live.
1865 Ratio = InVWidth / VWidth;
1866 for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
1867 if (DemandedElts[InIdx / Ratio])
1868 InputDemandedElts.setBit(InIdx);
1869 } else {
1870 // Unsupported so far.
1871 break;
1872 }
1873
1874 simplifyAndSetOp(I, 0, InputDemandedElts, PoisonElts2);
1875
1876 if (VWidth == InVWidth) {
1877 PoisonElts = PoisonElts2;
1878 } else if ((VWidth % InVWidth) == 0) {
1879 // If the number of elements in the output is a multiple of the number of
1880 // elements in the input then an output element is undef if the
1881 // corresponding input element is undef.
1882 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
1883 if (PoisonElts2[OutIdx / Ratio])
1884 PoisonElts.setBit(OutIdx);
1885 } else if ((InVWidth % VWidth) == 0) {
1886 // If the number of elements in the input is a multiple of the number of
1887 // elements in the output then an output element is undef if all of the
1888 // corresponding input elements are undef.
1889 for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
1890 APInt SubUndef = PoisonElts2.lshr(shiftAmt: OutIdx * Ratio).zextOrTrunc(width: Ratio);
1891 if (SubUndef.popcount() == Ratio)
1892 PoisonElts.setBit(OutIdx);
1893 }
1894 } else {
1895 llvm_unreachable("Unimp");
1896 }
1897 break;
1898 }
1899 case Instruction::FPTrunc:
1900 case Instruction::FPExt:
1901 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
1902 break;
1903
1904 case Instruction::Call: {
1905 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I);
1906 if (!II) break;
1907 switch (II->getIntrinsicID()) {
1908 case Intrinsic::masked_gather: // fallthrough
1909 case Intrinsic::masked_load: {
1910 // Subtlety: If we load from a pointer, the pointer must be valid
1911 // regardless of whether the element is demanded. Doing otherwise risks
1912 // segfaults which didn't exist in the original program.
1913 APInt DemandedPtrs(APInt::getAllOnes(numBits: VWidth)),
1914 DemandedPassThrough(DemandedElts);
1915 if (auto *CMask = dyn_cast<Constant>(Val: II->getOperand(i_nocapture: 1))) {
1916 for (unsigned i = 0; i < VWidth; i++) {
1917 if (Constant *CElt = CMask->getAggregateElement(Elt: i)) {
1918 if (CElt->isNullValue())
1919 DemandedPtrs.clearBit(BitPosition: i);
1920 else if (CElt->isAllOnesValue())
1921 DemandedPassThrough.clearBit(BitPosition: i);
1922 }
1923 }
1924 }
1925
1926 if (II->getIntrinsicID() == Intrinsic::masked_gather)
1927 simplifyAndSetOp(II, 0, DemandedPtrs, PoisonElts2);
1928 simplifyAndSetOp(II, 2, DemandedPassThrough, PoisonElts3);
1929
1930 // Output elements are undefined if the element from both sources are.
1931 // TODO: can strengthen via mask as well.
1932 PoisonElts = PoisonElts2 & PoisonElts3;
1933 break;
1934 }
1935 default: {
1936 // Handle target specific intrinsics
1937 std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
1938 II&: *II, DemandedElts, UndefElts&: PoisonElts, UndefElts2&: PoisonElts2, UndefElts3&: PoisonElts3,
1939 SimplifyAndSetOp: simplifyAndSetOp);
1940 if (V)
1941 return *V;
1942 break;
1943 }
1944 } // switch on IntrinsicID
1945 break;
1946 } // case Call
1947 } // switch on Opcode
1948
1949 // TODO: We bail completely on integer div/rem and shifts because they have
1950 // UB/poison potential, but that should be refined.
1951 BinaryOperator *BO;
1952 if (match(V: I, P: m_BinOp(I&: BO)) && !BO->isIntDivRem() && !BO->isShift()) {
1953 Value *X = BO->getOperand(i_nocapture: 0);
1954 Value *Y = BO->getOperand(i_nocapture: 1);
1955
1956 // Look for an equivalent binop except that one operand has been shuffled.
1957 // If the demand for this binop only includes elements that are the same as
1958 // the other binop, then we may be able to replace this binop with a use of
1959 // the earlier one.
1960 //
1961 // Example:
1962 // %other_bo = bo (shuf X, {0}), Y
1963 // %this_extracted_bo = extelt (bo X, Y), 0
1964 // -->
1965 // %other_bo = bo (shuf X, {0}), Y
1966 // %this_extracted_bo = extelt %other_bo, 0
1967 //
1968 // TODO: Handle demand of an arbitrary single element or more than one
1969 // element instead of just element 0.
1970 // TODO: Unlike general demanded elements transforms, this should be safe
1971 // for any (div/rem/shift) opcode too.
1972 if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
1973 BO->hasOneUse() ) {
1974
1975 auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
1976 // Try to use shuffle-of-operand in place of an operand:
1977 // bo X, Y --> bo (shuf X), Y
1978 // bo X, Y --> bo X, (shuf Y)
1979
1980 Value *OtherOp = MatchShufAsOp0 ? Y : X;
1981 if (!OtherOp->hasUseList())
1982 return nullptr;
1983
1984 BinaryOperator::BinaryOps Opcode = BO->getOpcode();
1985 Value *ShufOp = MatchShufAsOp0 ? X : Y;
1986
1987 for (User *U : OtherOp->users()) {
1988 ArrayRef<int> Mask;
1989 auto Shuf = m_Shuffle(v1: m_Specific(V: ShufOp), v2: m_Value(), mask: m_Mask(Mask));
1990 if (BO->isCommutative()
1991 ? match(V: U, P: m_c_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp)))
1992 : MatchShufAsOp0
1993 ? match(V: U, P: m_BinOp(Opcode, L: Shuf, R: m_Specific(V: OtherOp)))
1994 : match(V: U, P: m_BinOp(Opcode, L: m_Specific(V: OtherOp), R: Shuf)))
1995 if (match(Mask, P: m_ZeroMask()) && Mask[0] != PoisonMaskElem)
1996 if (DT.dominates(Def: U, User: I))
1997 return U;
1998 }
1999 return nullptr;
2000 };
2001
2002 User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true);
2003 if (!ShufBO)
2004 ShufBO = findShufBO(/* MatchShufAsOp0 */ false);
2005 if (ShufBO) {
2006 auto *ShufBOI = cast<Instruction>(Val: ShufBO);
2007 ShufBOI->andIRFlags(V: BO);
2008 Worklist.add(I: ShufBOI);
2009 return ShufBO;
2010 }
2011 }
2012
2013 simplifyAndSetOp(I, 0, DemandedElts, PoisonElts);
2014 simplifyAndSetOp(I, 1, DemandedElts, PoisonElts2);
2015
2016 // Output elements are undefined if both are undefined. Consider things
2017 // like undef & 0. The result is known zero, not undef.
2018 PoisonElts &= PoisonElts2;
2019 }
2020
2021 // If we've proven all of the lanes poison, return a poison value.
2022 // TODO: Intersect w/demanded lanes
2023 if (PoisonElts.isAllOnes())
2024 return PoisonValue::get(T: I->getType());
2025
2026 return MadeChange ? I : nullptr;
2027}
2028
2029/// For floating-point classes that resolve to a single bit pattern, return that
2030/// value.
2031static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask,
2032 bool IsCanonicalizing = false) {
2033 if (Mask == fcNone)
2034 return PoisonValue::get(T: Ty);
2035
2036 if (Mask == fcPosZero)
2037 return Constant::getNullValue(Ty);
2038
2039 // TODO: Support aggregate types that are allowed by FPMathOperator.
2040 if (Ty->isAggregateType())
2041 return nullptr;
2042
2043 // Turn any possible snans into quiet if we can.
2044 if (Mask == fcNan && IsCanonicalizing)
2045 return ConstantFP::getQNaN(Ty);
2046
2047 switch (Mask) {
2048 case fcNegZero:
2049 return ConstantFP::getZero(Ty, Negative: true);
2050 case fcPosInf:
2051 return ConstantFP::getInfinity(Ty);
2052 case fcNegInf:
2053 return ConstantFP::getInfinity(Ty, Negative: true);
2054 case fcQNan:
2055 // Payload bits cannot be dropped for pure signbit operations.
2056 return IsCanonicalizing ? ConstantFP::getQNaN(Ty) : nullptr;
2057 default:
2058 return nullptr;
2059 }
2060}
2061
2062/// Perform multiple-use aware simplfications for fabs(\p Src). Returns a
2063/// replacement value if it's simplified, otherwise nullptr. Updates \p Known
2064/// with the known fpclass if not simplified.
2065static Value *simplifyDemandedFPClassFabs(KnownFPClass &Known, Value *Src,
2066 FPClassTest DemandedMask,
2067 KnownFPClass KnownSrc, bool NSZ) {
2068 if ((DemandedMask & fcNan) == fcNone)
2069 KnownSrc.knownNot(RuleOut: fcNan);
2070 if ((DemandedMask & fcInf) == fcNone)
2071 KnownSrc.knownNot(RuleOut: fcInf);
2072
2073 if (KnownSrc.SignBit == false ||
2074 ((DemandedMask & fcNan) == fcNone && KnownSrc.isKnownNever(Mask: fcNegative)))
2075 return Src;
2076
2077 // If the only sign bit difference is due to -0, ignore it with nsz
2078 if (NSZ &&
2079 KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedLessThanZeroMask | fcNan))
2080 return Src;
2081
2082 Known = KnownFPClass::fabs(Src: KnownSrc);
2083 Known.knownNot(RuleOut: ~DemandedMask);
2084 return nullptr;
2085}
2086
2087/// Try to set an inferred no-nans or no-infs in \p FMF. \p ValidResults is a
2088/// mask of known valid results for the operator (already computed from the
2089/// result, and the known operand inputs in \p Known)
2090static FastMathFlags inferFastMathValueFlags(FastMathFlags FMF,
2091 FPClassTest ValidResults,
2092 ArrayRef<KnownFPClass> Known) {
2093 if (!FMF.noNaNs() && (ValidResults & fcNan) == fcNone) {
2094 if (all_of(Range&: Known, P: [](const KnownFPClass KnownSrc) {
2095 return KnownSrc.isKnownNeverNaN();
2096 }))
2097 FMF.setNoNaNs();
2098 }
2099
2100 if (!FMF.noInfs() && (ValidResults & fcInf) == fcNone) {
2101 if (all_of(Range&: Known, P: [](const KnownFPClass KnownSrc) {
2102 return KnownSrc.isKnownNeverInfinity();
2103 }))
2104 FMF.setNoInfs();
2105 }
2106
2107 return FMF;
2108}
2109
2110static FPClassTest adjustDemandedMaskFromFlags(FPClassTest DemandedMask,
2111 FastMathFlags FMF) {
2112 if (FMF.noNaNs())
2113 DemandedMask &= ~fcNan;
2114
2115 if (FMF.noInfs())
2116 DemandedMask &= ~fcInf;
2117 return DemandedMask;
2118}
2119
2120/// Apply epilog fixups to a floating-point intrinsic. See if the result can
2121/// fold to a constant, or apply fast math flags.
2122static Value *simplifyDemandedFPClassResult(Instruction *FPOp,
2123 FastMathFlags FMF,
2124 FPClassTest DemandedMask,
2125 KnownFPClass &Known,
2126 ArrayRef<KnownFPClass> KnownSrcs) {
2127 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2128 Constant *SingleVal = getFPClassConstant(Ty: FPOp->getType(), Mask: ValidResults,
2129 /*IsCanonicalizing=*/true);
2130 if (SingleVal)
2131 return SingleVal;
2132
2133 FastMathFlags InferredFMF =
2134 inferFastMathValueFlags(FMF, ValidResults, Known: KnownSrcs);
2135 if (InferredFMF != FMF) {
2136 FPOp->dropUBImplyingAttrsAndMetadata();
2137 FPOp->setFastMathFlags(InferredFMF);
2138 return FPOp;
2139 }
2140
2141 return nullptr;
2142}
2143
2144/// Perform multiple-use aware simplfications for fneg(fabs(\p Src)). Returns a
2145/// replacement value if it's simplified, otherwise nullptr. Updates \p Known
2146/// with the known fpclass if not simplified.
2147static Value *simplifyDemandedFPClassFnegFabs(KnownFPClass &Known, Value *Src,
2148 FPClassTest DemandedMask,
2149 KnownFPClass KnownSrc, bool NSZ) {
2150 if ((DemandedMask & fcNan) == fcNone)
2151 KnownSrc.knownNot(RuleOut: fcNan);
2152 if ((DemandedMask & fcInf) == fcNone)
2153 KnownSrc.knownNot(RuleOut: fcInf);
2154
2155 // If the source value is known negative, we can directly fold to it.
2156 if (KnownSrc.SignBit == true)
2157 return Src;
2158
2159 // If the only sign bit difference is for 0, ignore it with nsz.
2160 if (NSZ &&
2161 KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedGreaterThanZeroMask | fcNan))
2162 return Src;
2163
2164 Known = KnownFPClass::fneg(Src: KnownFPClass::fabs(Src: KnownSrc));
2165 Known.knownNot(RuleOut: ~DemandedMask);
2166 return nullptr;
2167}
2168
2169static Value *simplifyDemandedFPClassCopysignMag(Value *MagSrc,
2170 FPClassTest DemandedMask,
2171 KnownFPClass KnownSrc,
2172 bool NSZ) {
2173 if (NSZ) {
2174 constexpr FPClassTest NegOrZero = fcNegative | fcPosZero;
2175 constexpr FPClassTest PosOrZero = fcPositive | fcNegZero;
2176
2177 if ((DemandedMask & ~NegOrZero) == fcNone &&
2178 KnownSrc.isKnownAlways(Mask: NegOrZero))
2179 return MagSrc;
2180
2181 if ((DemandedMask & ~PosOrZero) == fcNone &&
2182 KnownSrc.isKnownAlways(Mask: PosOrZero))
2183 return MagSrc;
2184 } else {
2185 if ((DemandedMask & ~fcNegative) == fcNone && KnownSrc.SignBit == true)
2186 return MagSrc;
2187
2188 if ((DemandedMask & ~fcPositive) == fcNone && KnownSrc.SignBit == false)
2189 return MagSrc;
2190 }
2191
2192 return nullptr;
2193}
2194
2195static Value *
2196simplifyDemandedFPClassMinMax(KnownFPClass &Known, Intrinsic::ID IID,
2197 const CallInst *CI, FPClassTest DemandedMask,
2198 KnownFPClass KnownLHS, KnownFPClass KnownRHS,
2199 const Function &F, bool NSZ) {
2200 bool OrderedZeroSign = !NSZ;
2201
2202 KnownFPClass::MinMaxKind OpKind;
2203 switch (IID) {
2204 case Intrinsic::maximum: {
2205 OpKind = KnownFPClass::MinMaxKind::maximum;
2206
2207 // If one operand is known greater than the other, it must be that
2208 // operand unless the other is a nan.
2209 if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses,
2210 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2211 KnownRHS.isKnownNever(Mask: fcNan))
2212 return CI->getArgOperand(i: 0);
2213
2214 if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses,
2215 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2216 KnownLHS.isKnownNever(Mask: fcNan))
2217 return CI->getArgOperand(i: 1);
2218
2219 break;
2220 }
2221 case Intrinsic::minimum: {
2222 OpKind = KnownFPClass::MinMaxKind::minimum;
2223
2224 // If one operand is known less than the other, it must be that operand
2225 // unless the other is a nan.
2226 if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses,
2227 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2228 KnownRHS.isKnownNever(Mask: fcNan))
2229 return CI->getArgOperand(i: 0);
2230
2231 if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses,
2232 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2233 KnownLHS.isKnownNever(Mask: fcNan))
2234 return CI->getArgOperand(i: 1);
2235
2236 break;
2237 }
2238 case Intrinsic::maxnum:
2239 case Intrinsic::maximumnum: {
2240 OpKind = IID == Intrinsic::maxnum ? KnownFPClass::MinMaxKind::maxnum
2241 : KnownFPClass::MinMaxKind::maximumnum;
2242
2243 if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses,
2244 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2245 KnownLHS.isKnownNever(Mask: fcNan))
2246 return CI->getArgOperand(i: 0);
2247
2248 if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses,
2249 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2250 KnownRHS.isKnownNever(Mask: fcNan))
2251 return CI->getArgOperand(i: 1);
2252
2253 break;
2254 }
2255 case Intrinsic::minnum:
2256 case Intrinsic::minimumnum: {
2257 OpKind = IID == Intrinsic::minnum ? KnownFPClass::MinMaxKind::minnum
2258 : KnownFPClass::MinMaxKind::minimumnum;
2259
2260 if (cannotOrderStrictlyGreater(LHS: KnownLHS.KnownFPClasses,
2261 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2262 KnownLHS.isKnownNever(Mask: fcNan))
2263 return CI->getArgOperand(i: 0);
2264
2265 if (cannotOrderStrictlyLess(LHS: KnownLHS.KnownFPClasses,
2266 RHS: KnownRHS.KnownFPClasses, OrderedZeroSign) &&
2267 KnownRHS.isKnownNever(Mask: fcNan))
2268 return CI->getArgOperand(i: 1);
2269
2270 break;
2271 }
2272 default:
2273 llvm_unreachable("not a min/max intrinsic");
2274 }
2275
2276 Type *EltTy = CI->getType()->getScalarType();
2277 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2278 Known = KnownFPClass::minMaxLike(LHS: KnownLHS, RHS: KnownRHS, Kind: OpKind, DenormMode: Mode);
2279 Known.knownNot(RuleOut: ~DemandedMask);
2280
2281 return getFPClassConstant(Ty: CI->getType(), Mask: Known.KnownFPClasses,
2282 /*IsCanonicalizing=*/true);
2283}
2284
2285static Value *
2286simplifyDemandedUseFPClassFPTrunc(InstCombinerImpl &IC, Instruction &I,
2287 FastMathFlags FMF, FPClassTest DemandedMask,
2288 KnownFPClass &Known, const SimplifyQuery &SQ,
2289 unsigned Depth) {
2290
2291 FPClassTest SrcDemandedMask = DemandedMask;
2292 if (DemandedMask & fcNan)
2293 SrcDemandedMask |= fcNan;
2294
2295 // Zero results may have been rounded from subnormal or normal sources.
2296 if (DemandedMask & fcNegZero)
2297 SrcDemandedMask |= fcNegSubnormal | fcNegNormal;
2298 if (DemandedMask & fcPosZero)
2299 SrcDemandedMask |= fcPosSubnormal | fcPosNormal;
2300
2301 // Subnormal results may have been normal in the source type
2302 if (DemandedMask & fcNegSubnormal)
2303 SrcDemandedMask |= fcNegNormal;
2304 if (DemandedMask & fcPosSubnormal)
2305 SrcDemandedMask |= fcPosNormal;
2306
2307 if (DemandedMask & fcPosInf)
2308 SrcDemandedMask |= fcPosNormal;
2309 if (DemandedMask & fcNegInf)
2310 SrcDemandedMask |= fcNegNormal;
2311
2312 KnownFPClass KnownSrc;
2313 if (IC.SimplifyDemandedFPClass(I: &I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ,
2314 Depth: Depth + 1))
2315 return &I;
2316
2317 Known = KnownFPClass::fptrunc(KnownSrc);
2318 Known.knownNot(RuleOut: ~DemandedMask);
2319
2320 return simplifyDemandedFPClassResult(FPOp: &I, FMF, DemandedMask, Known,
2321 KnownSrcs: {KnownSrc});
2322}
2323
2324Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Instruction *I,
2325 FPClassTest DemandedMask,
2326 KnownFPClass &Known,
2327 const SimplifyQuery &SQ,
2328 unsigned Depth) {
2329 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2330 assert(Known == KnownFPClass() && "expected uninitialized state");
2331
2332 Type *VTy = I->getType();
2333
2334 FastMathFlags FMF;
2335 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: I)) {
2336 FMF = FPOp->getFastMathFlags();
2337 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
2338 }
2339
2340 switch (I->getOpcode()) {
2341 case Instruction::FNeg: {
2342 // Special case fneg(fabs(x))
2343
2344 Value *FNegSrc = I->getOperand(i: 0);
2345 Value *FNegFAbsSrc;
2346 if (match(V: FNegSrc, P: m_OneUse(SubPattern: m_FAbs(Op0: m_Value(V&: FNegFAbsSrc))))) {
2347 KnownFPClass KnownSrc;
2348 if (SimplifyDemandedFPClass(I: cast<Instruction>(Val: FNegSrc), Op: 0,
2349 DemandedMask: llvm::unknown_sign(Mask: DemandedMask), Known&: KnownSrc,
2350 Q: SQ, Depth: Depth + 1))
2351 return I;
2352
2353 FastMathFlags FabsFMF = cast<FPMathOperator>(Val: FNegSrc)->getFastMathFlags();
2354 FPClassTest ThisDemandedMask =
2355 adjustDemandedMaskFromFlags(DemandedMask, FMF: FabsFMF);
2356
2357 bool IsNSZ = FMF.noSignedZeros() || FabsFMF.noSignedZeros();
2358 if (Value *Simplified = simplifyDemandedFPClassFnegFabs(
2359 Known, Src: FNegFAbsSrc, DemandedMask: ThisDemandedMask, KnownSrc, NSZ: IsNSZ))
2360 return Simplified;
2361
2362 if ((ThisDemandedMask & fcNan) == fcNone)
2363 KnownSrc.knownNot(RuleOut: fcNan);
2364 if ((ThisDemandedMask & fcInf) == fcNone)
2365 KnownSrc.knownNot(RuleOut: fcInf);
2366
2367 // fneg(fabs(x)) => fneg(x)
2368 if (KnownSrc.SignBit == false)
2369 return replaceOperand(I&: *I, OpNum: 0, V: FNegFAbsSrc);
2370
2371 // fneg(fabs(x)) => fneg(x), ignoring -0 if nsz.
2372 if (IsNSZ &&
2373 KnownSrc.isKnownNever(Mask: KnownFPClass::OrderedLessThanZeroMask | fcNan))
2374 return replaceOperand(I&: *I, OpNum: 0, V: FNegFAbsSrc);
2375
2376 break;
2377 }
2378
2379 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::fneg(Mask: DemandedMask), Known, Q: SQ,
2380 Depth: Depth + 1))
2381 return I;
2382 Known.fneg();
2383 Known.knownNot(RuleOut: ~DemandedMask);
2384 break;
2385 }
2386 case Instruction::FAdd:
2387 case Instruction::FSub: {
2388 KnownFPClass KnownLHS, KnownRHS;
2389
2390 // fadd x, x can be handled more aggressively.
2391 if (I->getOperand(i: 0) == I->getOperand(i: 1) &&
2392 I->getOpcode() == Instruction::FAdd &&
2393 isGuaranteedNotToBeUndef(V: I->getOperand(i: 0), AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT,
2394 Depth: Depth + 1)) {
2395 Type *EltTy = VTy->getScalarType();
2396 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2397
2398 FPClassTest SrcDemandedMask = DemandedMask;
2399 if (DemandedMask & fcNan)
2400 SrcDemandedMask |= fcNan;
2401
2402 // Doubling a subnormal could have resulted in a normal value.
2403 if (DemandedMask & fcPosNormal)
2404 SrcDemandedMask |= fcPosSubnormal;
2405 if (DemandedMask & fcNegNormal)
2406 SrcDemandedMask |= fcNegSubnormal;
2407
2408 // Doubling a subnormal may produce 0 if FTZ/DAZ.
2409 if (Mode != DenormalMode::getIEEE()) {
2410 if (DemandedMask & fcPosZero) {
2411 SrcDemandedMask |= fcPosSubnormal;
2412
2413 if (Mode.inputsMayBePositiveZero() || Mode.outputsMayBePositiveZero())
2414 SrcDemandedMask |= fcNegSubnormal;
2415 }
2416
2417 if (DemandedMask & fcNegZero)
2418 SrcDemandedMask |= fcNegSubnormal;
2419 }
2420
2421 // Doubling a normal could have resulted in an infinity.
2422 if (DemandedMask & fcPosInf)
2423 SrcDemandedMask |= fcPosNormal;
2424 if (DemandedMask & fcNegInf)
2425 SrcDemandedMask |= fcNegNormal;
2426
2427 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Q: SQ,
2428 Depth: Depth + 1))
2429 return I;
2430
2431 Known = KnownFPClass::fadd_self(Src: KnownLHS, Mode);
2432 KnownRHS = KnownLHS;
2433 } else {
2434 FPClassTest SrcDemandedMask = fcFinite;
2435
2436 // inf + (-inf) = nan
2437 if (DemandedMask & fcNan)
2438 SrcDemandedMask |= fcNan | fcInf;
2439
2440 if (DemandedMask & fcInf)
2441 SrcDemandedMask |= fcInf;
2442
2443 if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, Q: SQ,
2444 Depth: Depth + 1) ||
2445 SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Q: SQ,
2446 Depth: Depth + 1))
2447 return I;
2448
2449 Type *EltTy = VTy->getScalarType();
2450 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2451
2452 Known = I->getOpcode() == Instruction::FAdd
2453 ? KnownFPClass::fadd(LHS: KnownLHS, RHS: KnownRHS, Mode)
2454 : KnownFPClass::fsub(LHS: KnownLHS, RHS: KnownRHS, Mode);
2455 }
2456
2457 Known.knownNot(RuleOut: ~DemandedMask);
2458
2459 if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
2460 /*IsCanonicalizing=*/true))
2461 return SingleVal;
2462
2463 // Propagate known result to simplify edge case checks.
2464 bool ResultNotNan = (DemandedMask & fcNan) == fcNone;
2465
2466 // With nnan: X + {+/-}Inf --> {+/-}Inf
2467 if (ResultNotNan && I->getOpcode() == Instruction::FAdd &&
2468 KnownRHS.isKnownAlways(Mask: fcInf | fcNan) && KnownLHS.isKnownNever(Mask: fcNan))
2469 return I->getOperand(i: 1);
2470
2471 // With nnan: {+/-}Inf + X --> {+/-}Inf
2472 // With nnan: {+/-}Inf - X --> {+/-}Inf
2473 if (ResultNotNan && KnownLHS.isKnownAlways(Mask: fcInf | fcNan) &&
2474 KnownRHS.isKnownNever(Mask: fcNan))
2475 return I->getOperand(i: 0);
2476
2477 FastMathFlags InferredFMF = inferFastMathValueFlags(
2478 FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS});
2479 if (InferredFMF != FMF) {
2480 I->setFastMathFlags(InferredFMF);
2481 return I;
2482 }
2483
2484 return nullptr;
2485 }
2486 case Instruction::FMul: {
2487 KnownFPClass KnownLHS, KnownRHS;
2488
2489 Value *X = I->getOperand(i: 0);
2490 Value *Y = I->getOperand(i: 1);
2491
2492 FPClassTest SrcDemandedMask =
2493 DemandedMask & (fcNan | fcZero | fcSubnormal | fcNormal);
2494
2495 if (DemandedMask & fcInf) {
2496 // mul x, inf = inf
2497 // mul large_x, large_y = inf
2498 SrcDemandedMask |= fcSubnormal | fcNormal | fcInf;
2499 }
2500
2501 if (DemandedMask & fcNan) {
2502 // mul +/-inf, 0 => nan
2503 SrcDemandedMask |= fcZero | fcInf | fcNan;
2504
2505 // TODO: Mode check
2506 // mul +/-inf, sub => nan if daz
2507 SrcDemandedMask |= fcSubnormal;
2508 }
2509
2510 // mul normal, subnormal = normal
2511 // Normal inputs may result in underflow.
2512 if (DemandedMask & (fcNormal | fcSubnormal))
2513 SrcDemandedMask |= fcNormal | fcSubnormal;
2514
2515 if (DemandedMask & fcZero)
2516 SrcDemandedMask |= fcNormal | fcSubnormal;
2517
2518 if (X == Y &&
2519 isGuaranteedNotToBeUndef(V: X, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT, Depth: Depth + 1)) {
2520 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Q: SQ,
2521 Depth: Depth + 1))
2522 return I;
2523 Type *EltTy = VTy->getScalarType();
2524
2525 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2526 Known = KnownFPClass::square(Src: KnownLHS, Mode);
2527 Known.knownNot(RuleOut: ~DemandedMask);
2528
2529 if (Constant *Folded = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
2530 /*IsCanonicalizing=*/true))
2531 return Folded;
2532
2533 if (Known.isKnownAlways(Mask: fcPosZero | fcPosInf | fcNan) &&
2534 KnownLHS.isKnownNever(Mask: fcSubnormal | fcNormal)) {
2535 // We can skip the fabs if the source was already known positive.
2536 if (KnownLHS.isKnownAlways(Mask: fcPositive))
2537 return X;
2538
2539 // => fabs(x), in case this was a -inf or -0.
2540 // Note: Dropping canonicalize.
2541 IRBuilderBase::InsertPointGuard Guard(Builder);
2542 Builder.SetInsertPoint(I);
2543 Value *Fabs = Builder.CreateFAbs(V: X, FMFSource: FMF);
2544 Fabs->takeName(V: I);
2545 return Fabs;
2546 }
2547
2548 return nullptr;
2549 }
2550
2551 if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, Q: SQ,
2552 Depth: Depth + 1) ||
2553 SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Q: SQ, Depth: Depth + 1))
2554 return I;
2555
2556 if (FMF.noInfs()) {
2557 // Flag implies inputs cannot be infinity.
2558 KnownLHS.knownNot(RuleOut: fcInf);
2559 KnownRHS.knownNot(RuleOut: fcInf);
2560 }
2561
2562 bool NonNanResult = (DemandedMask & fcNan) == fcNone;
2563
2564 // With no-nans/no-infs:
2565 // X * 0.0 --> copysign(0.0, X)
2566 // X * -0.0 --> copysign(0.0, -X)
2567 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2568 KnownRHS.isKnownAlways(Mask: fcPosZero | fcNan)) {
2569 IRBuilderBase::InsertPointGuard Guard(Builder);
2570 Builder.SetInsertPoint(I);
2571
2572 // => copysign(+0, lhs)
2573 // Note: Dropping canonicalize
2574 Value *Copysign = Builder.CreateCopySign(LHS: Y, RHS: X, FMFSource: FMF);
2575 Copysign->takeName(V: I);
2576 return Copysign;
2577 }
2578
2579 if (KnownLHS.isKnownAlways(Mask: fcPosZero | fcNan) &&
2580 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2581 IRBuilderBase::InsertPointGuard Guard(Builder);
2582 Builder.SetInsertPoint(I);
2583
2584 // => copysign(+0, rhs)
2585 // Note: Dropping canonicalize
2586 Value *Copysign = Builder.CreateCopySign(LHS: X, RHS: Y, FMFSource: FMF);
2587 Copysign->takeName(V: I);
2588 return Copysign;
2589 }
2590
2591 if ((NonNanResult || KnownLHS.isKnownNeverInfOrNaN()) &&
2592 KnownRHS.isKnownAlways(Mask: fcNegZero | fcNan)) {
2593 IRBuilderBase::InsertPointGuard Guard(Builder);
2594 Builder.SetInsertPoint(I);
2595
2596 // => copysign(0, fneg(lhs))
2597 // Note: Dropping canonicalize
2598 Value *Copysign =
2599 Builder.CreateCopySign(LHS: Y, RHS: Builder.CreateFNegFMF(V: X, FMFSource: FMF), FMFSource: FMF);
2600 Copysign->takeName(V: I);
2601 return Copysign;
2602 }
2603
2604 if (KnownLHS.isKnownAlways(Mask: fcNegZero | fcNan) &&
2605 (NonNanResult || KnownRHS.isKnownNeverInfOrNaN())) {
2606 IRBuilderBase::InsertPointGuard Guard(Builder);
2607 Builder.SetInsertPoint(I);
2608
2609 // => copysign(+0, fneg(rhs))
2610 // Note: Dropping canonicalize
2611 Value *Copysign =
2612 Builder.CreateCopySign(LHS: X, RHS: Builder.CreateFNegFMF(V: Y, FMFSource: FMF), FMFSource: FMF);
2613 Copysign->takeName(V: I);
2614 return Copysign;
2615 }
2616
2617 Type *EltTy = VTy->getScalarType();
2618 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2619
2620 if (KnownLHS.isKnownAlways(Mask: fcInf | fcNan) &&
2621 (KnownRHS.isKnownNeverNaN() &&
2622 KnownRHS.cannotBeOrderedGreaterEqZero(Mode))) {
2623 IRBuilderBase::InsertPointGuard Guard(Builder);
2624 Builder.SetInsertPoint(I);
2625
2626 // Note: Dropping canonicalize
2627 Value *Neg = Builder.CreateFNegFMF(V: X, FMFSource: FMF);
2628 Neg->takeName(V: I);
2629 return Neg;
2630 }
2631
2632 if (KnownRHS.isKnownAlways(Mask: fcInf | fcNan) &&
2633 (KnownLHS.isKnownNeverNaN() &&
2634 KnownLHS.cannotBeOrderedGreaterEqZero(Mode))) {
2635 IRBuilderBase::InsertPointGuard Guard(Builder);
2636 Builder.SetInsertPoint(I);
2637
2638 // Note: Dropping canonicalize
2639 Value *Neg = Builder.CreateFNegFMF(V: Y, FMFSource: FMF);
2640 Neg->takeName(V: I);
2641 return Neg;
2642 }
2643
2644 Known = KnownFPClass::fmul(LHS: KnownLHS, RHS: KnownRHS, Mode);
2645 Known.knownNot(RuleOut: ~DemandedMask);
2646
2647 if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
2648 /*IsCanonicalizing=*/true))
2649 return SingleVal;
2650
2651 FastMathFlags InferredFMF = inferFastMathValueFlags(
2652 FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS});
2653 if (InferredFMF != FMF) {
2654 I->setFastMathFlags(InferredFMF);
2655 return I;
2656 }
2657
2658 return nullptr;
2659 }
2660 case Instruction::FDiv: {
2661 Value *X = I->getOperand(i: 0);
2662 Value *Y = I->getOperand(i: 1);
2663 if (X == Y &&
2664 isGuaranteedNotToBeUndef(V: X, AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT, Depth: Depth + 1)) {
2665 // If the source is 0, inf or nan, the result is a nan
2666 IRBuilderBase::InsertPointGuard Guard(Builder);
2667 Builder.SetInsertPoint(I);
2668
2669 Value *IsZeroOrNan = Builder.CreateFCmpFMF(
2670 P: FCmpInst::FCMP_UEQ, LHS: I->getOperand(i: 0), RHS: ConstantFP::getZero(Ty: VTy), FMFSource: FMF);
2671
2672 Value *Fabs = Builder.CreateFAbs(V: I->getOperand(i: 0), FMFSource: FMF);
2673 Value *IsInfOrNan = Builder.CreateFCmpFMF(
2674 P: FCmpInst::FCMP_UEQ, LHS: Fabs, RHS: ConstantFP::getInfinity(Ty: VTy), FMFSource: FMF);
2675
2676 Value *IsInfOrZeroOrNan = Builder.CreateOr(LHS: IsInfOrNan, RHS: IsZeroOrNan);
2677
2678 return Builder.CreateSelectFMFWithUnknownProfile(
2679 C: IsInfOrZeroOrNan, True: ConstantFP::getQNaN(Ty: VTy),
2680 False: ConstantFP::get(
2681 Ty: VTy, V: APFloat::getOne(Sem: VTy->getScalarType()->getFltSemantics())),
2682 FMFSource: FMF, DEBUG_TYPE);
2683 }
2684
2685 Type *EltTy = VTy->getScalarType();
2686 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2687
2688 // Every output class could require denormal inputs (except for the
2689 // degenerate case of only-nan results, without DAZ).
2690 FPClassTest SrcDemandedMask = (DemandedMask & fcNan) | fcSubnormal;
2691
2692 // Normal inputs may result in underflow.
2693 // x / x = 1.0 for non0/inf/nan
2694 // -x = +y / -z
2695 // -x = -y / +z
2696 if (DemandedMask & (fcSubnormal | fcNormal))
2697 SrcDemandedMask |= fcNormal;
2698
2699 if (DemandedMask & fcNan) {
2700 // 0 / 0 = nan
2701 // inf / inf = nan
2702
2703 // Subnormal is added in case of DAZ, but this isn't strictly
2704 // necessary. Every other input class implies a possible subnormal source,
2705 // so this only could matter in the degenerate case of only-nan results.
2706 SrcDemandedMask |= fcZero | fcInf | fcNan;
2707 }
2708
2709 // Zero outputs may be the result of underflow.
2710 if (DemandedMask & fcZero)
2711 SrcDemandedMask |= fcNormal | fcSubnormal;
2712
2713 FPClassTest LHSDemandedMask = SrcDemandedMask;
2714 FPClassTest RHSDemandedMask = SrcDemandedMask;
2715
2716 // 0 / inf = 0
2717 if (DemandedMask & fcZero) {
2718 assert((LHSDemandedMask & fcSubnormal) &&
2719 "should not have to worry about daz here");
2720 LHSDemandedMask |= fcZero;
2721 RHSDemandedMask |= fcInf;
2722 }
2723
2724 // x / 0 = inf
2725 // large_normal / small_normal = inf
2726 // inf / 1 = inf
2727 // large_normal / subnormal = inf
2728 if (DemandedMask & fcInf) {
2729 LHSDemandedMask |= fcInf | fcNormal | fcSubnormal;
2730 RHSDemandedMask |= fcZero | fcSubnormal | fcNormal;
2731 }
2732
2733 KnownFPClass KnownLHS, KnownRHS;
2734 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: LHSDemandedMask, Known&: KnownLHS, Q: SQ,
2735 Depth: Depth + 1) ||
2736 SimplifyDemandedFPClass(I, Op: 1, DemandedMask: RHSDemandedMask, Known&: KnownRHS, Q: SQ, Depth: Depth + 1))
2737 return I;
2738
2739 // nsz [+-]0 / x -> 0
2740 if (FMF.noSignedZeros() && KnownLHS.isKnownAlways(Mask: fcZero) &&
2741 KnownRHS.isKnownNeverNaN())
2742 return ConstantFP::getZero(Ty: VTy);
2743
2744 if (KnownLHS.isKnownAlways(Mask: fcPosZero) && KnownRHS.isKnownNeverNaN()) {
2745 IRBuilderBase::InsertPointGuard Guard(Builder);
2746 Builder.SetInsertPoint(I);
2747
2748 // nnan +0 / x -> copysign(0, rhs)
2749 // TODO: -0 / x => copysign(0, fneg(rhs))
2750 Value *Copysign = Builder.CreateCopySign(LHS: X, RHS: Y, FMFSource: FMF);
2751 Copysign->takeName(V: I);
2752 return Copysign;
2753 }
2754
2755 bool ResultNotNan = (DemandedMask & fcNan) == fcNone;
2756 bool ResultNotInf = (DemandedMask & fcInf) == fcNone;
2757
2758 if (!ResultNotInf &&
2759 ((ResultNotNan || (KnownLHS.isKnownNeverNaN() &&
2760 KnownLHS.isKnownNeverLogicalZero(Mode))) &&
2761 (KnownRHS.isKnownAlways(Mask: fcPosZero) ||
2762 (FMF.noSignedZeros() && KnownRHS.isKnownAlways(Mask: fcZero))))) {
2763 IRBuilderBase::InsertPointGuard Guard(Builder);
2764 Builder.SetInsertPoint(I);
2765
2766 // nnan x / 0 => copysign(inf, x);
2767 // nnan nsz x / -0 => copysign(inf, x);
2768 Value *Copysign =
2769 Builder.CreateCopySign(LHS: ConstantFP::getInfinity(Ty: VTy), RHS: X, FMFSource: FMF);
2770 Copysign->takeName(V: I);
2771 return Copysign;
2772 }
2773
2774 // nnan ninf X / [-]0.0 -> poison
2775 if (ResultNotNan && ResultNotInf && KnownRHS.isKnownAlways(Mask: fcZero))
2776 return PoisonValue::get(T: VTy);
2777
2778 Known = KnownFPClass::fdiv(LHS: KnownLHS, RHS: KnownRHS, Mode);
2779 Known.knownNot(RuleOut: ~DemandedMask);
2780
2781 if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
2782 /*IsCanonicalizing=*/true))
2783 return SingleVal;
2784
2785 FastMathFlags InferredFMF = inferFastMathValueFlags(
2786 FMF, ValidResults: Known.KnownFPClasses, Known: {KnownLHS, KnownRHS});
2787 if (InferredFMF != FMF) {
2788 I->setFastMathFlags(InferredFMF);
2789 return I;
2790 }
2791
2792 return nullptr;
2793 }
2794 case Instruction::FPTrunc:
2795 return simplifyDemandedUseFPClassFPTrunc(IC&: *this, I&: *I, FMF, DemandedMask,
2796 Known, SQ, Depth);
2797 case Instruction::FPExt: {
2798 FPClassTest SrcDemandedMask = DemandedMask;
2799 if (DemandedMask & fcNan)
2800 SrcDemandedMask |= fcNan;
2801
2802 // No subnormal result does not imply not-subnormal in the source type.
2803 if ((DemandedMask & fcNegNormal) != fcNone)
2804 SrcDemandedMask |= fcNegSubnormal;
2805 if ((DemandedMask & fcPosNormal) != fcNone)
2806 SrcDemandedMask |= fcPosSubnormal;
2807
2808 KnownFPClass KnownSrc;
2809 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ, Depth: Depth + 1))
2810 return I;
2811
2812 const fltSemantics &DstTy = VTy->getScalarType()->getFltSemantics();
2813 const fltSemantics &SrcTy =
2814 I->getOperand(i: 0)->getType()->getScalarType()->getFltSemantics();
2815
2816 Known = KnownFPClass::fpext(KnownSrc, DstTy, SrcTy);
2817 Known.knownNot(RuleOut: ~DemandedMask);
2818
2819 return simplifyDemandedFPClassResult(FPOp: I, FMF, DemandedMask, Known,
2820 KnownSrcs: {KnownSrc});
2821 }
2822 case Instruction::Call: {
2823 CallInst *CI = cast<CallInst>(Val: I);
2824 const Intrinsic::ID IID = CI->getIntrinsicID();
2825 switch (IID) {
2826 case Intrinsic::fabs: {
2827 KnownFPClass KnownSrc;
2828 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: llvm::inverse_fabs(Mask: DemandedMask),
2829 Known&: KnownSrc, Q: SQ, Depth: Depth + 1))
2830 return I;
2831
2832 if (Value *Simplified = simplifyDemandedFPClassFabs(
2833 Known, Src: CI->getArgOperand(i: 0), DemandedMask, KnownSrc,
2834 NSZ: FMF.noSignedZeros()))
2835 return Simplified;
2836 break;
2837 }
2838 case Intrinsic::arithmetic_fence:
2839 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Q: SQ, Depth: Depth + 1))
2840 return I;
2841 break;
2842 case Intrinsic::copysign: {
2843 // Flip on more potentially demanded classes
2844 const FPClassTest DemandedMaskAnySign = llvm::unknown_sign(Mask: DemandedMask);
2845 KnownFPClass KnownMag;
2846 if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: DemandedMaskAnySign, Known&: KnownMag, Q: SQ,
2847 Depth: Depth + 1))
2848 return I;
2849
2850 if ((DemandedMask & fcNegative) == DemandedMask) {
2851 // Roundabout way of replacing with fneg(fabs)
2852 CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::get(Ty: VTy, V: -1.0));
2853 return I;
2854 }
2855
2856 if ((DemandedMask & fcPositive) == DemandedMask) {
2857 // Roundabout way of replacing with fabs
2858 CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::getZero(Ty: VTy));
2859 return I;
2860 }
2861
2862 if (Value *Simplified = simplifyDemandedFPClassCopysignMag(
2863 MagSrc: CI->getArgOperand(i: 0), DemandedMask, KnownSrc: KnownMag,
2864 NSZ: FMF.noSignedZeros()))
2865 return Simplified;
2866
2867 KnownFPClass KnownSign =
2868 computeKnownFPClass(V: CI->getArgOperand(i: 1), InterestedClasses: fcAllFlags, SQ, Depth: Depth + 1);
2869 if (KnownMag.SignBit && KnownSign.SignBit &&
2870 *KnownMag.SignBit == *KnownSign.SignBit)
2871 return CI->getOperand(i_nocapture: 0);
2872
2873 // TODO: Call argument attribute not considered
2874 // Input implied not-nan from flag.
2875 if (FMF.noNaNs())
2876 KnownSign.knownNot(RuleOut: fcNan);
2877
2878 if (KnownSign.SignBit == false) {
2879 CI->dropUBImplyingAttrsAndMetadata();
2880 CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::getZero(Ty: VTy));
2881 return I;
2882 }
2883
2884 if (KnownSign.SignBit == true) {
2885 CI->dropUBImplyingAttrsAndMetadata();
2886 CI->setOperand(i_nocapture: 1, Val_nocapture: ConstantFP::get(Ty: VTy, V: -1.0));
2887 return I;
2888 }
2889
2890 Known = KnownFPClass::copysign(KnownMag, KnownSign);
2891 Known.knownNot(RuleOut: ~DemandedMask);
2892 break;
2893 }
2894 case Intrinsic::fma:
2895 case Intrinsic::fmuladd: {
2896 // We can't do any simplification on the source besides stripping out
2897 // unneeded nans.
2898 FPClassTest SrcDemandedMask = DemandedMask | ~fcNan;
2899 if (DemandedMask & fcNan)
2900 SrcDemandedMask |= fcNan;
2901
2902 KnownFPClass KnownSrc[3];
2903
2904 Type *EltTy = VTy->getScalarType();
2905 if (CI->getArgOperand(i: 0) == CI->getArgOperand(i: 1) &&
2906 isGuaranteedNotToBeUndef(V: CI->getArgOperand(i: 0), AC: SQ.AC, CtxI: SQ.CxtI, DT: SQ.DT,
2907 Depth: Depth + 1)) {
2908 if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc[0], Q: SQ,
2909 Depth: Depth + 1) ||
2910 SimplifyDemandedFPClass(I: CI, Op: 2, DemandedMask: SrcDemandedMask, Known&: KnownSrc[2], Q: SQ,
2911 Depth: Depth + 1))
2912 return I;
2913
2914 KnownSrc[1] = KnownSrc[0];
2915 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2916 Known = KnownFPClass::fma_square(Squared: KnownSrc[0], Addend: KnownSrc[2], Mode);
2917 } else {
2918 for (int OpIdx = 0; OpIdx != 3; ++OpIdx) {
2919 if (SimplifyDemandedFPClass(I: CI, Op: OpIdx, DemandedMask: SrcDemandedMask,
2920 Known&: KnownSrc[OpIdx], Q: SQ, Depth: Depth + 1))
2921 return CI;
2922 }
2923
2924 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2925 Known = KnownFPClass::fma(LHS: KnownSrc[0], RHS: KnownSrc[1], Addend: KnownSrc[2], Mode);
2926 }
2927
2928 return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known,
2929 KnownSrcs: {KnownSrc});
2930 }
2931 case Intrinsic::maximum:
2932 case Intrinsic::minimum:
2933 case Intrinsic::maximumnum:
2934 case Intrinsic::minimumnum:
2935 case Intrinsic::maxnum:
2936 case Intrinsic::minnum: {
2937 const bool PropagateNaN =
2938 IID == Intrinsic::maximum || IID == Intrinsic::minimum;
2939
2940 // We can't tell much based on the demanded result without inspecting the
2941 // operands (e.g., a known-positive result could have been clamped), but
2942 // we can still prune known-nan inputs.
2943 FPClassTest SrcDemandedMask =
2944 PropagateNaN && ((DemandedMask & fcNan) == fcNone)
2945 ? DemandedMask | ~fcNan
2946 : fcAllFlags;
2947
2948 KnownFPClass KnownLHS, KnownRHS;
2949 if (SimplifyDemandedFPClass(I: CI, Op: 1, DemandedMask: SrcDemandedMask, Known&: KnownRHS, Q: SQ,
2950 Depth: Depth + 1) ||
2951 SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownLHS, Q: SQ,
2952 Depth: Depth + 1))
2953 return I;
2954
2955 Value *Simplified =
2956 simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask, KnownLHS,
2957 KnownRHS, F, NSZ: FMF.noSignedZeros());
2958 if (Simplified)
2959 return Simplified;
2960
2961 auto *FPOp = cast<FPMathOperator>(Val: CI);
2962
2963 FPClassTest ValidResults = DemandedMask & Known.KnownFPClasses;
2964 FastMathFlags InferredFMF = FMF;
2965
2966 if (!FMF.noSignedZeros()) {
2967 // Add NSZ flag if we know the result will not be sensitive to the sign
2968 // of 0.
2969 FPClassTest ZeroMask = fcZero;
2970
2971 Type *EltTy = VTy->getScalarType();
2972 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
2973 if (Mode != DenormalMode::getIEEE())
2974 ZeroMask |= fcSubnormal;
2975
2976 bool ResultNotLogical0 = (ValidResults & ZeroMask) == fcNone;
2977 if (ResultNotLogical0 || ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
2978 KnownRHS.isKnownNeverLogicalPosZero(Mode)) &&
2979 (KnownLHS.isKnownNeverLogicalPosZero(Mode) ||
2980 KnownRHS.isKnownNeverLogicalNegZero(Mode))))
2981 InferredFMF.setNoSignedZeros(true);
2982 }
2983
2984 if (!FMF.noNaNs() &&
2985 ((PropagateNaN && (ValidResults & fcNan) == fcNone) ||
2986 (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN()))) {
2987 CI->dropUBImplyingAttrsAndMetadata();
2988 InferredFMF.setNoNaNs(true);
2989 }
2990
2991 if (InferredFMF != FMF) {
2992 CI->setFastMathFlags(InferredFMF);
2993 return FPOp;
2994 }
2995
2996 return nullptr;
2997 }
2998 case Intrinsic::exp:
2999 case Intrinsic::exp2:
3000 case Intrinsic::exp10: {
3001 if ((DemandedMask & fcPositive) == fcNone) {
3002 // Only returns positive values or nans.
3003 if ((DemandedMask & fcNan) == fcNone)
3004 return PoisonValue::get(T: VTy);
3005
3006 // Only need nan propagation.
3007 if ((DemandedMask & ~fcNan) == fcNone)
3008 return ConstantFP::getQNaN(Ty: VTy);
3009
3010 return CI->getArgOperand(i: 0);
3011 }
3012
3013 FPClassTest SrcDemandedMask = DemandedMask & fcNan;
3014 if (DemandedMask & fcNan)
3015 SrcDemandedMask |= fcNan;
3016
3017 if (DemandedMask & fcZero) {
3018 // exp(-infinity) = 0
3019 SrcDemandedMask |= fcNegInf;
3020
3021 // exp(-largest_normal) = 0
3022 //
3023 // Negative numbers of sufficiently large magnitude underflow to 0. No
3024 // subnormal input has a 0 result.
3025 SrcDemandedMask |= fcNegNormal;
3026 }
3027
3028 if (DemandedMask & fcPosSubnormal) {
3029 // Negative numbers of sufficiently large magnitude underflow to 0. No
3030 // subnormal input has a 0 result.
3031 SrcDemandedMask |= fcNegNormal;
3032 }
3033
3034 if (DemandedMask & fcPosNormal) {
3035 // exp(0) = 1
3036 // exp(+/- smallest_normal) = 1
3037 // exp(+/- largest_denormal) = 1
3038 // exp(+/- smallest_denormal) = 1
3039 // exp(-1) = pos normal
3040 SrcDemandedMask |= fcNormal | fcSubnormal | fcZero;
3041 }
3042
3043 // exp(inf), exp(largest_normal) = inf
3044 if (DemandedMask & fcPosInf)
3045 SrcDemandedMask |= fcPosInf | fcPosNormal;
3046
3047 KnownFPClass KnownSrc;
3048
3049 // TODO: This could really make use of KnownFPClass of specific value
3050 // range, (i.e., close enough to 1)
3051 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ,
3052 Depth: Depth + 1))
3053 return I;
3054
3055 // exp(+/-0) = 1
3056 if (KnownSrc.isKnownAlways(Mask: fcZero))
3057 return ConstantFP::get(Ty: VTy, V: 1.0);
3058
3059 // Only perform nan propagation.
3060 // Note: Dropping canonicalize / quiet of signaling nan.
3061 if (KnownSrc.isKnownAlways(Mask: fcNan))
3062 return CI->getArgOperand(i: 0);
3063
3064 // exp(0 | nan) => x == 0.0 ? 1.0 : x
3065 if (KnownSrc.isKnownAlways(Mask: fcZero | fcNan)) {
3066 IRBuilderBase::InsertPointGuard Guard(Builder);
3067 Builder.SetInsertPoint(CI);
3068
3069 // fadd +/-0, 1.0 => 1.0
3070 // fadd nan, 1.0 => nan
3071 return Builder.CreateFAddFMF(L: CI->getArgOperand(i: 0),
3072 R: ConstantFP::get(Ty: VTy, V: 1.0), FMFSource: FMF);
3073 }
3074
3075 if (KnownSrc.isKnownAlways(Mask: fcInf | fcNan)) {
3076 // exp(-inf) = 0
3077 // exp(+inf) = +inf
3078 IRBuilderBase::InsertPointGuard Guard(Builder);
3079 Builder.SetInsertPoint(CI);
3080
3081 // Note: Dropping canonicalize / quiet of signaling nan.
3082 Value *X = CI->getArgOperand(i: 0);
3083 Value *IsPosInfOrNan = Builder.CreateFCmpFMF(
3084 P: FCmpInst::FCMP_UEQ, LHS: X, RHS: ConstantFP::getInfinity(Ty: VTy), FMFSource: FMF);
3085 // We do not know whether an infinity or a NaN is more likely here,
3086 // so mark the branch weights as unkown.
3087 Value *ZeroOrInf = Builder.CreateSelectFMFWithUnknownProfile(
3088 C: IsPosInfOrNan, True: X, False: ConstantFP::getZero(Ty: VTy), FMFSource: FMF, DEBUG_TYPE);
3089 return ZeroOrInf;
3090 }
3091
3092 Known = KnownFPClass::exp(Src: KnownSrc);
3093 Known.knownNot(RuleOut: ~DemandedMask);
3094
3095 return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known,
3096 KnownSrcs: KnownSrc);
3097 }
3098 case Intrinsic::log:
3099 case Intrinsic::log2:
3100 case Intrinsic::log10: {
3101 FPClassTest DemandedSrcMask = DemandedMask & (fcNan | fcPosInf);
3102 if (DemandedMask & fcNan)
3103 DemandedSrcMask |= fcNan;
3104
3105 Type *EltTy = VTy->getScalarType();
3106 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
3107
3108 // log(x < 0) = nan
3109 if (DemandedMask & fcNan)
3110 DemandedSrcMask |= (fcNegative & ~fcNegZero);
3111
3112 // log(0) = -inf
3113 if (DemandedMask & fcNegInf) {
3114 DemandedSrcMask |= fcZero;
3115
3116 // No value produces subnormal result.
3117 if (Mode.inputsMayBeZero())
3118 DemandedSrcMask |= fcSubnormal;
3119 }
3120
3121 if (DemandedMask & fcNormal)
3122 DemandedSrcMask |= fcNormal | fcSubnormal;
3123
3124 // log(1) = 0
3125 if (DemandedMask & fcZero)
3126 DemandedSrcMask |= fcPosNormal;
3127
3128 KnownFPClass KnownSrc;
3129 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Q: SQ,
3130 Depth: Depth + 1))
3131 return I;
3132
3133 Known = KnownFPClass::log(Src: KnownSrc, Mode);
3134 Known.knownNot(RuleOut: ~DemandedMask);
3135
3136 return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known,
3137 KnownSrcs: KnownSrc);
3138 }
3139 case Intrinsic::sqrt: {
3140 FPClassTest DemandedSrcMask =
3141 DemandedMask & (fcNegZero | fcPositive | fcNan);
3142
3143 if (DemandedMask & fcNan)
3144 DemandedSrcMask |= fcNan | (fcNegative & ~fcNegZero);
3145
3146 // sqrt(max_subnormal) is a normal value
3147 if (DemandedMask & fcPosNormal)
3148 DemandedSrcMask |= fcPosSubnormal;
3149
3150 KnownFPClass KnownSrc;
3151 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Q: SQ,
3152 Depth: Depth + 1))
3153 return I;
3154
3155 // Infer the source cannot be negative if the result cannot be nan.
3156 if ((DemandedMask & fcNan) == fcNone)
3157 KnownSrc.knownNot(RuleOut: (fcNegative & ~fcNegZero) | fcNan);
3158
3159 // Infer the source cannot be +inf if the result is not +nf
3160 if ((DemandedMask & fcPosInf) == fcNone)
3161 KnownSrc.knownNot(RuleOut: fcPosInf);
3162
3163 Type *EltTy = VTy->getScalarType();
3164 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
3165
3166 // sqrt(-x) = nan, but be careful of negative subnormals flushed to 0.
3167 if (KnownSrc.isKnownNever(Mask: fcPositive) &&
3168 KnownSrc.isKnownNeverLogicalZero(Mode))
3169 return ConstantFP::getQNaN(Ty: VTy);
3170
3171 Known = KnownFPClass::sqrt(Src: KnownSrc, Mode);
3172 Known.knownNot(RuleOut: ~DemandedMask);
3173
3174 if (Known.KnownFPClasses == fcZero) {
3175 if (FMF.noSignedZeros())
3176 return ConstantFP::getZero(Ty: VTy);
3177 IRBuilderBase::InsertPointGuard Guard(Builder);
3178 Builder.SetInsertPoint(CI);
3179
3180 Value *Copysign = Builder.CreateCopySign(LHS: ConstantFP::getZero(Ty: VTy),
3181 RHS: CI->getArgOperand(i: 0), FMFSource: FMF);
3182 Copysign->takeName(V: CI);
3183 return Copysign;
3184 }
3185
3186 return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known,
3187 KnownSrcs: {KnownSrc});
3188 }
3189 case Intrinsic::ldexp: {
3190 FPClassTest SrcDemandedMask = DemandedMask & fcInf;
3191 if (DemandedMask & fcNan)
3192 SrcDemandedMask |= fcNan;
3193
3194 if (DemandedMask & fcPosInf)
3195 SrcDemandedMask |= fcPosNormal | fcPosSubnormal;
3196 if (DemandedMask & fcNegInf)
3197 SrcDemandedMask |= fcNegNormal | fcNegSubnormal;
3198
3199 if (DemandedMask & (fcPosNormal | fcPosSubnormal))
3200 SrcDemandedMask |= fcPosNormal | fcPosSubnormal;
3201 if (DemandedMask & (fcNegNormal | fcNegSubnormal))
3202 SrcDemandedMask |= fcNegNormal | fcNegSubnormal;
3203
3204 if (DemandedMask & fcPosZero)
3205 SrcDemandedMask |= fcPosFinite;
3206 if (DemandedMask & fcNegZero)
3207 SrcDemandedMask |= fcNegFinite;
3208
3209 KnownFPClass KnownSrc;
3210 if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ,
3211 Depth: Depth + 1))
3212 return CI;
3213
3214 Type *EltTy = VTy->getScalarType();
3215 const fltSemantics &FltSem = EltTy->getFltSemantics();
3216 DenormalMode Mode = F.getDenormalMode(FPType: FltSem);
3217
3218 KnownBits KnownExpBits =
3219 ::computeKnownBits(V: CI->getArgOperand(i: 1), Q: SQ, Depth: Depth + 1);
3220
3221 Known = KnownFPClass::ldexp(Src: KnownSrc, N: KnownExpBits, Flt: FltSem, Mode);
3222 Known.knownNot(RuleOut: ~DemandedMask);
3223
3224 return simplifyDemandedFPClassResult(FPOp: CI, FMF, DemandedMask, Known,
3225 KnownSrcs: {KnownSrc});
3226 }
3227 case Intrinsic::trunc:
3228 case Intrinsic::floor:
3229 case Intrinsic::ceil:
3230 case Intrinsic::rint:
3231 case Intrinsic::nearbyint:
3232 case Intrinsic::round:
3233 case Intrinsic::roundeven: {
3234 FPClassTest DemandedSrcMask = DemandedMask;
3235 if (DemandedMask & fcNan)
3236 DemandedSrcMask |= fcNan;
3237
3238 // Zero results imply valid subnormal sources.
3239 if (DemandedMask & fcNegZero)
3240 DemandedSrcMask |= fcNegSubnormal | fcNegNormal;
3241
3242 if (DemandedMask & fcPosZero)
3243 DemandedSrcMask |= fcPosSubnormal | fcPosNormal;
3244
3245 KnownFPClass KnownSrc;
3246 if (SimplifyDemandedFPClass(I: CI, Op: 0, DemandedMask: DemandedSrcMask, Known&: KnownSrc, Q: SQ,
3247 Depth: Depth + 1))
3248 return I;
3249
3250 // Note: Possibly dropping snan quiet.
3251 if (KnownSrc.isKnownAlways(Mask: fcInf | fcNan | fcZero))
3252 return CI->getArgOperand(i: 0);
3253
3254 bool IsRoundNearestOrTrunc =
3255 IID == Intrinsic::round || IID == Intrinsic::roundeven ||
3256 IID == Intrinsic::nearbyint || IID == Intrinsic::rint ||
3257 IID == Intrinsic::trunc;
3258
3259 // Ignore denormals-as-zero, as canonicalization is not mandated.
3260 if ((IID == Intrinsic::floor || IsRoundNearestOrTrunc) &&
3261 KnownSrc.isKnownAlways(Mask: fcPosZero | fcPosSubnormal))
3262 return ConstantFP::getZero(Ty: VTy);
3263
3264 if ((IID == Intrinsic::ceil || IsRoundNearestOrTrunc) &&
3265 KnownSrc.isKnownAlways(Mask: fcNegZero | fcNegSubnormal))
3266 return ConstantFP::getZero(Ty: VTy, Negative: true);
3267
3268 if (IID == Intrinsic::floor && KnownSrc.isKnownAlways(Mask: fcNegSubnormal))
3269 return ConstantFP::get(Ty: VTy, V: -1.0);
3270
3271 if (IID == Intrinsic::ceil && KnownSrc.isKnownAlways(Mask: fcPosSubnormal))
3272 return ConstantFP::get(Ty: VTy, V: 1.0);
3273
3274 Known = KnownFPClass::roundToIntegral(
3275 Src: KnownSrc, IsTrunc: IID == Intrinsic::trunc,
3276 IsMultiUnitFPType: VTy->getScalarType()->isMultiUnitFPType());
3277
3278 Known.knownNot(RuleOut: ~DemandedMask);
3279
3280 if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
3281 /*IsCanonicalizing=*/true))
3282 return SingleVal;
3283
3284 if ((IID == Intrinsic::trunc || IsRoundNearestOrTrunc) &&
3285 KnownSrc.isKnownAlways(Mask: fcZero | fcSubnormal)) {
3286 IRBuilderBase::InsertPointGuard Guard(Builder);
3287 Builder.SetInsertPoint(CI);
3288
3289 Value *Copysign = Builder.CreateCopySign(LHS: ConstantFP::getZero(Ty: VTy),
3290 RHS: CI->getArgOperand(i: 0));
3291 Copysign->takeName(V: CI);
3292 return Copysign;
3293 }
3294
3295 FastMathFlags InferredFMF =
3296 inferFastMathValueFlags(FMF, ValidResults: Known.KnownFPClasses, Known: KnownSrc);
3297 if (InferredFMF != FMF) {
3298 CI->dropUBImplyingAttrsAndMetadata();
3299 CI->setFastMathFlags(InferredFMF);
3300 return CI;
3301 }
3302
3303 return nullptr;
3304 }
3305 case Intrinsic::fptrunc_round:
3306 return simplifyDemandedUseFPClassFPTrunc(IC&: *this, I&: *CI, FMF, DemandedMask,
3307 Known, SQ, Depth);
3308 case Intrinsic::canonicalize: {
3309 Type *EltTy = VTy->getScalarType();
3310
3311 // TODO: This could have more refined support for PositiveZero denormal
3312 // mode.
3313 if (EltTy->isIEEELikeFPTy()) {
3314 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
3315
3316 FPClassTest SrcDemandedMask = DemandedMask;
3317
3318 // A demanded quiet nan result may have come from a signaling nan, so we
3319 // need to expand the demanded mask.
3320 if ((DemandedMask & fcQNan) != fcNone)
3321 SrcDemandedMask |= fcSNan;
3322
3323 if (Mode != DenormalMode::getIEEE()) {
3324 // Any zero results may have come from flushed denormals.
3325 if (DemandedMask & fcPosZero)
3326 SrcDemandedMask |= fcPosSubnormal;
3327 if (DemandedMask & fcNegZero)
3328 SrcDemandedMask |= fcNegSubnormal;
3329 }
3330
3331 if (Mode == DenormalMode::getPreserveSign()) {
3332 // If a denormal input will be flushed, and we don't need zeros, we
3333 // don't need denormals either.
3334 if ((DemandedMask & fcPosZero) == fcNone)
3335 SrcDemandedMask &= ~fcPosSubnormal;
3336
3337 if ((DemandedMask & fcNegZero) == fcNone)
3338 SrcDemandedMask &= ~fcNegSubnormal;
3339 }
3340
3341 KnownFPClass KnownSrc;
3342
3343 // Simplify upstream operations before trying to simplify this call.
3344 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ,
3345 Depth: Depth + 1))
3346 return I;
3347
3348 // Perform the canonicalization to see if this folded to a constant.
3349 Known = KnownFPClass::canonicalize(Src: KnownSrc, DenormMode: Mode);
3350 Known.knownNot(RuleOut: ~DemandedMask);
3351
3352 if (Constant *SingleVal = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses))
3353 return SingleVal;
3354
3355 // For IEEE handling, there is only a bit change for nan inputs, so we
3356 // can drop it if we do not demand nan results or we know the input
3357 // isn't a nan.
3358 // Otherwise, we also need to avoid denormal inputs to drop the
3359 // canonicalize.
3360 if (KnownSrc.isKnownNeverNaN() && (Mode == DenormalMode::getIEEE() ||
3361 KnownSrc.isKnownNeverSubnormal()))
3362 return CI->getArgOperand(i: 0);
3363
3364 FastMathFlags InferredFMF =
3365 inferFastMathValueFlags(FMF, ValidResults: Known.KnownFPClasses, Known: KnownSrc);
3366 if (InferredFMF != FMF) {
3367 CI->dropUBImplyingAttrsAndMetadata();
3368 CI->setFastMathFlags(InferredFMF);
3369 return CI;
3370 }
3371
3372 return nullptr;
3373 }
3374
3375 [[fallthrough]];
3376 }
3377 default:
3378 Known = computeKnownFPClass(V: I, InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3379 Known.knownNot(RuleOut: ~DemandedMask);
3380 break;
3381 }
3382
3383 break;
3384 }
3385 case Instruction::Select: {
3386 KnownFPClass KnownLHS, KnownRHS;
3387 if (SimplifyDemandedFPClass(I, Op: 2, DemandedMask, Known&: KnownRHS, Q: SQ, Depth: Depth + 1) ||
3388 SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownLHS, Q: SQ, Depth: Depth + 1))
3389 return I;
3390
3391 if (KnownLHS.isKnownNever(Mask: DemandedMask))
3392 return I->getOperand(i: 2);
3393 if (KnownRHS.isKnownNever(Mask: DemandedMask))
3394 return I->getOperand(i: 1);
3395
3396 adjustKnownFPClassForSelectArm(Known&: KnownLHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1),
3397 /*Invert=*/false, Q: SQ, Depth);
3398 adjustKnownFPClassForSelectArm(Known&: KnownRHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2),
3399 /*Invert=*/true, Q: SQ, Depth);
3400 Known = KnownLHS.intersectWith(RHS: KnownRHS);
3401 Known.knownNot(RuleOut: ~DemandedMask);
3402 break;
3403 }
3404 case Instruction::ExtractElement: {
3405 // TODO: Handle demanded element mask
3406 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known, Q: SQ, Depth: Depth + 1))
3407 return I;
3408 Known.knownNot(RuleOut: ~DemandedMask);
3409 break;
3410 }
3411 case Instruction::InsertElement: {
3412 KnownFPClass KnownInserted, KnownVec;
3413 if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownInserted, Q: SQ,
3414 Depth: Depth + 1) ||
3415 SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownVec, Q: SQ, Depth: Depth + 1))
3416 return I;
3417
3418 // TODO: Use demanded elements logic from computeKnownFPClass
3419 Known = KnownVec | KnownInserted;
3420 Known.knownNot(RuleOut: ~DemandedMask);
3421 break;
3422 }
3423 case Instruction::ShuffleVector: {
3424 KnownFPClass KnownLHS, KnownRHS;
3425 if (SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownRHS, Q: SQ, Depth: Depth + 1) ||
3426 SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownLHS, Q: SQ, Depth: Depth + 1))
3427 return I;
3428
3429 // TODO: This is overly conservative and should consider demanded elements,
3430 // and splats.
3431 Known = KnownLHS | KnownRHS;
3432 Known.knownNot(RuleOut: ~DemandedMask);
3433 break;
3434 }
3435 case Instruction::InsertValue: {
3436 KnownFPClass KnownAgg, KnownElt;
3437 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownAgg, Q: SQ, Depth: Depth + 1) ||
3438 SimplifyDemandedFPClass(I, Op: 1, DemandedMask, Known&: KnownElt, Q: SQ, Depth: Depth + 1))
3439 return I;
3440
3441 Known = KnownAgg | KnownElt;
3442 break;
3443 }
3444 case Instruction::ExtractValue: {
3445 Value *ExtractSrc;
3446 if (match(V: I, P: m_ExtractValue<0>(V: m_OneUse(SubPattern: m_Value(V&: ExtractSrc))))) {
3447 if (auto *II = dyn_cast<IntrinsicInst>(Val: ExtractSrc)) {
3448 const Intrinsic::ID IID = II->getIntrinsicID();
3449 switch (IID) {
3450 case Intrinsic::frexp: {
3451 FPClassTest SrcDemandedMask = fcNone;
3452 if (DemandedMask & fcNan)
3453 SrcDemandedMask |= fcNan;
3454 if (DemandedMask & fcNegFinite)
3455 SrcDemandedMask |= fcNegFinite;
3456 if (DemandedMask & fcPosFinite)
3457 SrcDemandedMask |= fcPosFinite;
3458 if (DemandedMask & fcPosInf)
3459 SrcDemandedMask |= fcPosInf;
3460 if (DemandedMask & fcNegInf)
3461 SrcDemandedMask |= fcNegInf;
3462
3463 KnownFPClass KnownSrc;
3464 if (SimplifyDemandedFPClass(I: II, Op: 0, DemandedMask: SrcDemandedMask, Known&: KnownSrc, Q: SQ,
3465 Depth: Depth + 1))
3466 return I;
3467
3468 Type *EltTy = VTy->getScalarType();
3469 DenormalMode Mode = F.getDenormalMode(FPType: EltTy->getFltSemantics());
3470
3471 Known = KnownFPClass::frexp_mant(Src: KnownSrc, Mode);
3472 Known.KnownFPClasses &= DemandedMask;
3473
3474 if (Constant *SingleVal =
3475 getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses,
3476 /*IsCanonicalizing=*/true))
3477 return SingleVal;
3478
3479 if (Known.isKnownAlways(Mask: fcInf | fcNan))
3480 return II->getArgOperand(i: 0);
3481
3482 return nullptr;
3483 }
3484 default:
3485 break;
3486 }
3487 }
3488 }
3489
3490 KnownFPClass KnownSrc;
3491 if (SimplifyDemandedFPClass(I, Op: 0, DemandedMask, Known&: KnownSrc, Q: SQ, Depth: Depth + 1))
3492 return I;
3493 Known = KnownSrc;
3494 break;
3495 }
3496 case Instruction::PHI: {
3497 const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
3498 if (Depth >= PhiRecursionLimit)
3499 break;
3500
3501 PHINode *P = cast<PHINode>(Val: I);
3502 SimplifyQuery ContextSQ = SQ.getWithoutCondContext();
3503
3504 bool First = true;
3505 bool Changed = false;
3506 for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; ++I) {
3507 // TODO: Better support for self recursive phi
3508 BasicBlock *PredBB = P->getIncomingBlock(i: I);
3509 const Instruction *CtxI = PredBB->getTerminator();
3510
3511 // Attempt to simplify all incoming edges at a time. If we simplify one
3512 // incoming edge, the phi may fold away, losing information on a later
3513 // visit.
3514 KnownFPClass KnownSrc;
3515 if (SimplifyDemandedFPClass(
3516 I: P, Op: P->getOperandNumForIncomingValue(i: I), DemandedMask, Known&: KnownSrc,
3517 Q: ContextSQ.getWithInstruction(I: CtxI), Depth: Depth + 1)) {
3518 // Fixup the other block references to the simplified value.
3519 P->setIncomingValueForBlock(BB: PredBB, V: P->getIncomingValue(i: I));
3520 Changed = true;
3521 }
3522
3523 if (First) {
3524 Known = KnownSrc;
3525 First = false;
3526 } else {
3527 Known |= KnownSrc;
3528 }
3529 }
3530
3531 if (Changed)
3532 return P;
3533
3534 Known.knownNot(RuleOut: ~DemandedMask);
3535 break;
3536 }
3537 default:
3538 Known = computeKnownFPClass(V: I, InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3539 Known.knownNot(RuleOut: ~DemandedMask);
3540 break;
3541 }
3542
3543 return getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses);
3544}
3545
3546/// Helper routine of SimplifyDemandedUseFPClass. It computes Known
3547/// floating-point classes. It also tries to handle simplifications that can be
3548/// done based on DemandedMask, but without modifying the Instruction.
3549Value *InstCombinerImpl::SimplifyMultipleUseDemandedFPClass(
3550 Instruction *I, FPClassTest DemandedMask, KnownFPClass &Known,
3551 const SimplifyQuery &SQ, unsigned Depth) {
3552 FastMathFlags FMF;
3553 if (auto *FPOp = dyn_cast<FPMathOperator>(Val: I)) {
3554 FMF = FPOp->getFastMathFlags();
3555 DemandedMask = adjustDemandedMaskFromFlags(DemandedMask, FMF);
3556 }
3557
3558 switch (I->getOpcode()) {
3559 case Instruction::Select: {
3560 // TODO: Can we infer which side it came from based on adjusted result
3561 // class?
3562 KnownFPClass KnownRHS =
3563 computeKnownFPClass(V: I->getOperand(i: 2), InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3564 if (KnownRHS.isKnownNever(Mask: DemandedMask))
3565 return I->getOperand(i: 1);
3566
3567 KnownFPClass KnownLHS =
3568 computeKnownFPClass(V: I->getOperand(i: 1), InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3569 if (KnownLHS.isKnownNever(Mask: DemandedMask))
3570 return I->getOperand(i: 2);
3571
3572 adjustKnownFPClassForSelectArm(Known&: KnownLHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 1),
3573 /*Invert=*/false, Q: SQ, Depth);
3574 adjustKnownFPClassForSelectArm(Known&: KnownRHS, Cond: I->getOperand(i: 0), Arm: I->getOperand(i: 2),
3575 /*Invert=*/true, Q: SQ, Depth);
3576 Known = KnownLHS.intersectWith(RHS: KnownRHS);
3577 Known.knownNot(RuleOut: ~DemandedMask);
3578 break;
3579 }
3580 case Instruction::FNeg: {
3581 // Special case fneg(fabs(x))
3582 Value *Src;
3583
3584 Value *FNegSrc = I->getOperand(i: 0);
3585 if (!match(V: FNegSrc, P: m_FAbs(Op0: m_Value(V&: Src)))) {
3586 Known = computeKnownFPClass(V: I, InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3587 break;
3588 }
3589
3590 KnownFPClass KnownSrc = computeKnownFPClass(V: Src, InterestedClasses: fcAllFlags, SQ, Depth: Depth + 1);
3591
3592 FastMathFlags FabsFMF = cast<FPMathOperator>(Val: FNegSrc)->getFastMathFlags();
3593 FPClassTest ThisDemandedMask =
3594 adjustDemandedMaskFromFlags(DemandedMask, FMF: FabsFMF);
3595
3596 // We cannot apply the NSZ logic with multiple uses. We can apply it if the
3597 // inner fabs has it and this is the only use.
3598 if (Value *Simplified = simplifyDemandedFPClassFnegFabs(
3599 Known, Src, DemandedMask: ThisDemandedMask, KnownSrc, /*NSZ=*/false))
3600 return Simplified;
3601 break;
3602 }
3603 case Instruction::Call: {
3604 const CallInst *CI = cast<CallInst>(Val: I);
3605 const Intrinsic::ID IID = CI->getIntrinsicID();
3606 switch (IID) {
3607 case Intrinsic::fabs: {
3608 Value *Src = CI->getArgOperand(i: 0);
3609 KnownFPClass KnownSrc =
3610 computeKnownFPClass(V: Src, InterestedClasses: fcAllFlags, SQ, Depth: Depth + 1);
3611
3612 // NSZ cannot be applied in multiple use case (maybe it could if all uses
3613 // were known nsz)
3614 if (Value *Simplified = simplifyDemandedFPClassFabs(
3615 Known, Src: CI->getArgOperand(i: 0), DemandedMask, KnownSrc,
3616 /*NSZ=*/false))
3617 return Simplified;
3618 break;
3619 }
3620 case Intrinsic::copysign: {
3621 Value *Mag = CI->getArgOperand(i: 0);
3622 Value *Sign = CI->getArgOperand(i: 1);
3623 KnownFPClass KnownMag =
3624 computeKnownFPClass(V: Mag, InterestedClasses: fcAllFlags, SQ, Depth: Depth + 1);
3625
3626 // Rule out some cases by magnitude, which may help prove the sign bit is
3627 // one direction or the other.
3628 KnownMag.knownNot(RuleOut: ~llvm::unknown_sign(Mask: DemandedMask));
3629
3630 // Cannot use nsz in the multiple use case.
3631 if (Value *Simplified = simplifyDemandedFPClassCopysignMag(
3632 MagSrc: Mag, DemandedMask, KnownSrc: KnownMag, /*NSZ=*/false))
3633 return Simplified;
3634
3635 KnownFPClass KnownSign =
3636 computeKnownFPClass(V: Sign, InterestedClasses: fcAllFlags, SQ, Depth: Depth + 1);
3637
3638 if (FMF.noInfs())
3639 KnownSign.knownNot(RuleOut: fcInf);
3640 if (FMF.noNaNs())
3641 KnownSign.knownNot(RuleOut: fcNan);
3642
3643 if (KnownSign.SignBit && KnownMag.SignBit &&
3644 *KnownSign.SignBit == *KnownMag.SignBit)
3645 return Mag;
3646
3647 Known = KnownFPClass::copysign(KnownMag, KnownSign);
3648 break;
3649 }
3650 case Intrinsic::maxnum:
3651 case Intrinsic::minnum:
3652 case Intrinsic::maximum:
3653 case Intrinsic::minimum:
3654 case Intrinsic::maximumnum:
3655 case Intrinsic::minimumnum: {
3656 KnownFPClass KnownRHS = computeKnownFPClass(V: CI->getArgOperand(i: 1),
3657 InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3658 if (KnownRHS.isUnknown())
3659 return nullptr;
3660
3661 KnownFPClass KnownLHS = computeKnownFPClass(V: CI->getArgOperand(i: 0),
3662 InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3663
3664 // Cannot use NSZ in the multiple use case.
3665 return simplifyDemandedFPClassMinMax(Known, IID, CI, DemandedMask,
3666 KnownLHS, KnownRHS, F,
3667 /*NSZ=*/false);
3668 }
3669 default:
3670 break;
3671 }
3672
3673 [[fallthrough]];
3674 }
3675 default:
3676 Known = computeKnownFPClass(V: I, InterestedClasses: DemandedMask, SQ, Depth: Depth + 1);
3677 Known.knownNot(RuleOut: ~DemandedMask);
3678 break;
3679 }
3680
3681 return getFPClassConstant(Ty: I->getType(), Mask: Known.KnownFPClasses);
3682}
3683
3684bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo,
3685 FPClassTest DemandedMask,
3686 KnownFPClass &Known,
3687 const SimplifyQuery &SQ,
3688 unsigned Depth) {
3689 Use &U = I->getOperandUse(i: OpNo);
3690 Value *V = U.get();
3691 Type *VTy = V->getType();
3692
3693 if (DemandedMask == fcNone) {
3694 if (isa<PoisonValue>(Val: V))
3695 return false;
3696 replaceUse(U, NewValue: PoisonValue::get(T: VTy));
3697 return true;
3698 }
3699
3700 // Handle constant
3701 Instruction *VInst = dyn_cast<Instruction>(Val: V);
3702 if (!VInst) {
3703 // Handle constants and arguments
3704 Known = computeKnownFPClass(V, InterestedClasses: fcAllFlags, SQ, Depth);
3705 Known.knownNot(RuleOut: ~DemandedMask);
3706
3707 if (Known.KnownFPClasses == fcNone) {
3708 if (isa<PoisonValue>(Val: V))
3709 return false;
3710 replaceUse(U, NewValue: PoisonValue::get(T: VTy));
3711 return true;
3712 }
3713
3714 // Do not try to replace values which are already constants (unless we are
3715 // folding to poison). Doing so could promote poison elements to non-poison
3716 // constants.
3717 if (isa<Constant>(Val: V))
3718 return false;
3719
3720 Value *FoldedToConst = getFPClassConstant(Ty: VTy, Mask: Known.KnownFPClasses);
3721 if (!FoldedToConst || FoldedToConst == V)
3722 return false;
3723
3724 replaceUse(U, NewValue: FoldedToConst);
3725 return true;
3726 }
3727
3728 if (Depth == MaxAnalysisRecursionDepth) {
3729 Known.knownNot(RuleOut: ~DemandedMask);
3730 return false;
3731 }
3732
3733 Value *NewVal;
3734
3735 if (VInst->hasOneUse()) {
3736 // If the instruction has one use, we can directly simplify it.
3737 NewVal = SimplifyDemandedUseFPClass(I: VInst, DemandedMask, Known, SQ, Depth);
3738 } else {
3739 // If there are multiple uses of this instruction, then we can simplify
3740 // VInst to some other value, but not modify the instruction.
3741 NewVal = SimplifyMultipleUseDemandedFPClass(I: VInst, DemandedMask, Known, SQ,
3742 Depth);
3743 }
3744
3745 if (!NewVal)
3746 return false;
3747 if (Instruction *OpInst = dyn_cast<Instruction>(Val&: U))
3748 salvageDebugInfo(I&: *OpInst);
3749
3750 replaceUse(U, NewValue: NewVal);
3751 return true;
3752}
3753