1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
14#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/FloatingPointMode.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/IR/ConstantRange.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/FMF.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/KnownBits.h"
39#include "llvm/Support/KnownFPClass.h"
40#include "llvm/Target/TargetMachine.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
47char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
48
49INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
50 "Analysis for ComputingKnownBits", false, true)
51
52GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
56Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
57 const MachineInstr *MI = MRI.getVRegDef(Reg: R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(i: 2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(i: 1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1);
75 }
76}
77
78KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(R: MI.getOperand(i: 0).getReg());
82}
83
84KnownBits GISelValueTracking::getKnownBits(Register R) {
85 const LLT Ty = MRI.getType(Reg: R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
94KnownBits GISelValueTracking::getKnownBits(Register R,
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
102bool GISelValueTracking::signBitIsZero(Register R) {
103 LLT Ty = MRI.getType(Reg: R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
105 return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth));
106}
107
108APInt GISelValueTracking::getKnownZeroes(Register R) {
109 return getKnownBits(R).Zero;
110}
111
112APInt GISelValueTracking::getKnownOnes(Register R) {
113 return getKnownBits(R).One;
114}
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(RHS: Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
154 numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth));
155 Mask.One = APInt::getLowBitsSet(
156 numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth));
157 return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask;
158}
159
160void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(Reg: R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(RHS: Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1),
231 Depth: Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
240 Known.One = APInt::getAllOnes(numBits: BitWidth);
241 Known.Zero = APInt::getAllOnes(numBits: BitWidth);
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(i: Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 APInt NowDemandedElts;
262 if (!SrcTy.isFixedVector()) {
263 NowDemandedElts = APInt(1, 1);
264 } else if (DstTy.isFixedVector() &&
265 SrcTy.getNumElements() == DstTy.getNumElements()) {
266 NowDemandedElts = DemandedElts;
267 } else {
268 NowDemandedElts = APInt::getAllOnes(numBits: SrcTy.getNumElements());
269 }
270
271 // For COPYs we don't do anything, don't increase the depth.
272 computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts: NowDemandedElts,
273 Depth: Depth + (Opcode != TargetOpcode::COPY));
274 Known2 = Known2.anyextOrTrunc(BitWidth);
275 Known = Known.intersectWith(RHS: Known2);
276 // If we reach a point where we don't know anything
277 // just stop looking through the operands.
278 if (Known.isUnknown())
279 break;
280 } else {
281 // We know nothing.
282 Known = KnownBits(BitWidth);
283 break;
284 }
285 }
286 break;
287 }
288 case TargetOpcode::G_STEP_VECTOR: {
289 APInt Step = MI.getOperand(i: 1).getCImm()->getValue();
290
291 if (Step.isPowerOf2())
292 Known.Zero.setLowBits(Step.logBase2());
293
294 if (!isUIntN(N: BitWidth, x: DstTy.getElementCount().getKnownMinValue()))
295 break;
296
297 const APInt MinNumElts =
298 APInt(BitWidth, DstTy.getElementCount().getKnownMinValue());
299 const Function &F = getMachineFunction().getFunction();
300 bool Overflow;
301 const APInt MaxNumElts = getVScaleRange(F: &F, BitWidth)
302 .getUnsignedMax()
303 .umul_ov(RHS: MinNumElts, Overflow);
304 if (Overflow)
305 break;
306 const APInt MaxValue = (MaxNumElts - 1).umul_ov(RHS: Step, Overflow);
307 if (Overflow)
308 break;
309 Known.Zero.setHighBits(MaxValue.countl_zero());
310 break;
311 }
312 case TargetOpcode::G_CONSTANT: {
313 Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue());
314 break;
315 }
316 case TargetOpcode::G_FRAME_INDEX: {
317 int FrameIdx = MI.getOperand(i: 1).getIndex();
318 TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF);
319 break;
320 }
321 case TargetOpcode::G_SUB: {
322 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
323 Depth: Depth + 1);
324 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
325 Depth: Depth + 1);
326 Known = KnownBits::sub(LHS: Known, RHS: Known2, NSW: MI.getFlag(Flag: MachineInstr::NoSWrap),
327 NUW: MI.getFlag(Flag: MachineInstr::NoUWrap));
328 break;
329 }
330 case TargetOpcode::G_XOR: {
331 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
332 Depth: Depth + 1);
333 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
334 Depth: Depth + 1);
335
336 Known ^= Known2;
337 break;
338 }
339 case TargetOpcode::G_PTR_ADD: {
340 if (DstTy.isVector())
341 break;
342 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
343 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
344 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
345 break;
346 [[fallthrough]];
347 }
348 case TargetOpcode::G_ADD: {
349 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
350 Depth: Depth + 1);
351 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
352 Depth: Depth + 1);
353 Known = KnownBits::add(LHS: Known, RHS: Known2);
354 break;
355 }
356 case TargetOpcode::G_AND: {
357 // If either the LHS or the RHS are Zero, the result is zero.
358 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
359 Depth: Depth + 1);
360 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
361 Depth: Depth + 1);
362
363 Known &= Known2;
364 break;
365 }
366 case TargetOpcode::G_OR: {
367 // If either the LHS or the RHS are Zero, the result is zero.
368 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
369 Depth: Depth + 1);
370 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
371 Depth: Depth + 1);
372
373 Known |= Known2;
374 break;
375 }
376 case TargetOpcode::G_MUL: {
377 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
378 Depth: Depth + 1);
379 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
380 Depth: Depth + 1);
381 Known = KnownBits::mul(LHS: Known, RHS: Known2);
382 break;
383 }
384 case TargetOpcode::G_UMULH: {
385 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
386 Depth: Depth + 1);
387 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
388 Depth: Depth + 1);
389 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
390 break;
391 }
392 case TargetOpcode::G_SMULH: {
393 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
394 Depth: Depth + 1);
395 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
396 Depth: Depth + 1);
397 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
398 break;
399 }
400 case TargetOpcode::G_ABDU: {
401 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
402 Depth: Depth + 1);
403 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
404 Depth: Depth + 1);
405 Known = KnownBits::abdu(LHS: Known, RHS: Known2);
406 break;
407 }
408 case TargetOpcode::G_ABDS: {
409 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
410 Depth: Depth + 1);
411 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
412 Depth: Depth + 1);
413 Known = KnownBits::abds(LHS: Known, RHS: Known2);
414
415 unsigned SignBits1 =
416 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts, Depth: Depth + 1);
417 if (SignBits1 == 1) {
418 break;
419 }
420 unsigned SignBits0 =
421 computeNumSignBits(R: MI.getOperand(i: 1).getReg(), DemandedElts, Depth: Depth + 1);
422
423 Known.Zero.setHighBits(std::min(a: SignBits0, b: SignBits1) - 1);
424 break;
425 }
426 case TargetOpcode::G_UDIV: {
427 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
428 Depth: Depth + 1);
429 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
430 Depth: Depth + 1);
431 Known = KnownBits::udiv(LHS: Known, RHS: Known2,
432 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
433 break;
434 }
435 case TargetOpcode::G_SDIV: {
436 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
437 Depth: Depth + 1);
438 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
439 Depth: Depth + 1);
440 Known = KnownBits::sdiv(LHS: Known, RHS: Known2,
441 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
442 break;
443 }
444 case TargetOpcode::G_UREM: {
445 KnownBits LHSKnown(Known.getBitWidth());
446 KnownBits RHSKnown(Known.getBitWidth());
447
448 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
449 Depth: Depth + 1);
450 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
451 Depth: Depth + 1);
452
453 Known = KnownBits::urem(LHS: LHSKnown, RHS: RHSKnown);
454 break;
455 }
456 case TargetOpcode::G_SREM: {
457 KnownBits LHSKnown(Known.getBitWidth());
458 KnownBits RHSKnown(Known.getBitWidth());
459
460 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
461 Depth: Depth + 1);
462 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
463 Depth: Depth + 1);
464
465 Known = KnownBits::srem(LHS: LHSKnown, RHS: RHSKnown);
466 break;
467 }
468 case TargetOpcode::G_SELECT: {
469 computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(),
470 Known, DemandedElts, Depth: Depth + 1);
471 break;
472 }
473 case TargetOpcode::G_SMIN: {
474 // TODO: Handle clamp pattern with number of sign bits
475 KnownBits KnownRHS;
476 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
477 Depth: Depth + 1);
478 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
479 Depth: Depth + 1);
480 Known = KnownBits::smin(LHS: Known, RHS: KnownRHS);
481 break;
482 }
483 case TargetOpcode::G_SMAX: {
484 // TODO: Handle clamp pattern with number of sign bits
485 KnownBits KnownRHS;
486 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
487 Depth: Depth + 1);
488 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
489 Depth: Depth + 1);
490 Known = KnownBits::smax(LHS: Known, RHS: KnownRHS);
491 break;
492 }
493 case TargetOpcode::G_UMIN: {
494 KnownBits KnownRHS;
495 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
496 Depth: Depth + 1);
497 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
498 Depth: Depth + 1);
499 Known = KnownBits::umin(LHS: Known, RHS: KnownRHS);
500 break;
501 }
502 case TargetOpcode::G_UMAX: {
503 KnownBits KnownRHS;
504 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
505 Depth: Depth + 1);
506 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
507 Depth: Depth + 1);
508 Known = KnownBits::umax(LHS: Known, RHS: KnownRHS);
509 break;
510 }
511 case TargetOpcode::G_FCMP:
512 case TargetOpcode::G_ICMP: {
513 if (DstTy.isVector())
514 break;
515 if (TL.getBooleanContents(isVec: DstTy.isVector(),
516 isFloat: Opcode == TargetOpcode::G_FCMP) ==
517 TargetLowering::ZeroOrOneBooleanContent &&
518 BitWidth > 1)
519 Known.Zero.setBitsFrom(1);
520 break;
521 }
522 case TargetOpcode::G_SEXT: {
523 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
524 Depth: Depth + 1);
525 // If the sign bit is known to be zero or one, then sext will extend
526 // it to the top bits, else it will just zext.
527 Known = Known.sext(BitWidth);
528 break;
529 }
530 case TargetOpcode::G_ASSERT_SEXT:
531 case TargetOpcode::G_SEXT_INREG: {
532 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
533 Depth: Depth + 1);
534 Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm());
535 break;
536 }
537 case TargetOpcode::G_ANYEXT: {
538 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
539 Depth: Depth + 1);
540 Known = Known.anyext(BitWidth);
541 break;
542 }
543 case TargetOpcode::G_LOAD: {
544 const MachineMemOperand *MMO = *MI.memoperands_begin();
545 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
546 if (const MDNode *Ranges = MMO->getRanges())
547 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
548 Known = KnownRange.anyext(BitWidth: Known.getBitWidth());
549 break;
550 }
551 case TargetOpcode::G_SEXTLOAD:
552 case TargetOpcode::G_ZEXTLOAD: {
553 if (DstTy.isVector())
554 break;
555 const MachineMemOperand *MMO = *MI.memoperands_begin();
556 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
557 if (const MDNode *Ranges = MMO->getRanges())
558 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
559 Known = Opcode == TargetOpcode::G_SEXTLOAD
560 ? KnownRange.sext(BitWidth: Known.getBitWidth())
561 : KnownRange.zext(BitWidth: Known.getBitWidth());
562 break;
563 }
564 case TargetOpcode::G_ASHR: {
565 KnownBits LHSKnown, RHSKnown;
566 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
567 Depth: Depth + 1);
568 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
569 Depth: Depth + 1);
570 Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown);
571 break;
572 }
573 case TargetOpcode::G_LSHR: {
574 KnownBits LHSKnown, RHSKnown;
575 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
576 Depth: Depth + 1);
577 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
578 Depth: Depth + 1);
579 Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown);
580 break;
581 }
582 case TargetOpcode::G_SHL: {
583 KnownBits LHSKnown, RHSKnown;
584 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
585 Depth: Depth + 1);
586 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
587 Depth: Depth + 1);
588 Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown);
589 break;
590 }
591 case TargetOpcode::G_ROTL:
592 case TargetOpcode::G_ROTR: {
593 MachineInstr *AmtOpMI = MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
594 auto MaybeAmtOp = isConstantOrConstantSplatVector(MI&: *AmtOpMI, MRI);
595 if (!MaybeAmtOp)
596 break;
597
598 Register SrcReg = MI.getOperand(i: 1).getReg();
599 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
600
601 unsigned Amt = MaybeAmtOp->urem(RHS: BitWidth);
602
603 // Canonicalize to ROTR.
604 if (Opcode == TargetOpcode::G_ROTL)
605 Amt = BitWidth - Amt;
606
607 Known.Zero = Known.Zero.rotr(rotateAmt: Amt);
608 Known.One = Known.One.rotr(rotateAmt: Amt);
609 break;
610 }
611 case TargetOpcode::G_FSHL:
612 case TargetOpcode::G_FSHR: {
613 MachineInstr *AmtOpMI = MRI.getVRegDef(Reg: MI.getOperand(i: 3).getReg());
614 auto MaybeAmtOp = isConstantOrConstantSplatVector(MI&: *AmtOpMI, MRI);
615 if (!MaybeAmtOp)
616 break;
617
618 const APInt Amt = *MaybeAmtOp;
619 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
620 Depth: Depth + 1);
621 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
622 Depth: Depth + 1);
623 Known = Opcode == TargetOpcode::G_FSHL
624 ? KnownBits::fshl(LHS: Known, RHS: Known2, Amt)
625 : KnownBits::fshr(LHS: Known, RHS: Known2, Amt);
626 break;
627 }
628 case TargetOpcode::G_INTTOPTR:
629 case TargetOpcode::G_PTRTOINT:
630 if (DstTy.isVector())
631 break;
632 // Fall through and handle them the same as zext/trunc.
633 [[fallthrough]];
634 case TargetOpcode::G_ZEXT:
635 case TargetOpcode::G_TRUNC: {
636 Register SrcReg = MI.getOperand(i: 1).getReg();
637 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
638 Known = Known.zextOrTrunc(BitWidth);
639 break;
640 }
641 case TargetOpcode::G_ASSERT_ZEXT: {
642 Register SrcReg = MI.getOperand(i: 1).getReg();
643 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
644
645 unsigned SrcBitWidth = MI.getOperand(i: 2).getImm();
646 assert(SrcBitWidth && "SrcBitWidth can't be zero");
647 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth);
648 Known.Zero |= (~InMask);
649 Known.One &= (~Known.Zero);
650 break;
651 }
652 case TargetOpcode::G_ASSERT_ALIGN: {
653 int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm());
654
655 // TODO: Should use maximum with source
656 // If a node is guaranteed to be aligned, set low zero bits accordingly as
657 // well as clearing one bits.
658 Known.Zero.setLowBits(LogOfAlign);
659 Known.One.clearLowBits(loBits: LogOfAlign);
660 break;
661 }
662 case TargetOpcode::G_MERGE_VALUES: {
663 unsigned NumOps = MI.getNumOperands();
664 unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
665
666 for (unsigned I = 0; I != NumOps - 1; ++I) {
667 KnownBits SrcOpKnown;
668 computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown,
669 DemandedElts, Depth: Depth + 1);
670 Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize);
671 }
672 break;
673 }
674 case TargetOpcode::G_UNMERGE_VALUES: {
675 unsigned NumOps = MI.getNumOperands();
676 Register SrcReg = MI.getOperand(i: NumOps - 1).getReg();
677 LLT SrcTy = MRI.getType(Reg: SrcReg);
678
679 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
680 return; // TODO: Handle vector->subelement unmerges
681
682 // Figure out the result operand index
683 unsigned DstIdx = 0;
684 for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R;
685 ++DstIdx)
686 ;
687
688 APInt SubDemandedElts = DemandedElts;
689 if (SrcTy.isVector()) {
690 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
691 SubDemandedElts =
692 DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes);
693 }
694
695 KnownBits SrcOpKnown;
696 computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1);
697
698 if (SrcTy.isVector())
699 Known = std::move(SrcOpKnown);
700 else
701 Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx);
702 break;
703 }
704 case TargetOpcode::G_BSWAP: {
705 Register SrcReg = MI.getOperand(i: 1).getReg();
706 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
707 Known = Known.byteSwap();
708 break;
709 }
710 case TargetOpcode::G_BITREVERSE: {
711 Register SrcReg = MI.getOperand(i: 1).getReg();
712 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
713 Known = Known.reverseBits();
714 break;
715 }
716 case TargetOpcode::G_CTPOP: {
717 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
718 Depth: Depth + 1);
719 // We can bound the space the count needs. Also, bits known to be zero
720 // can't contribute to the population.
721 unsigned BitsPossiblySet = Known2.countMaxPopulation();
722 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
723 Known.Zero.setBitsFrom(LowBits);
724 // TODO: we could bound Known.One using the lower bound on the number of
725 // bits which might be set provided by popcnt KnownOne2.
726 break;
727 }
728 case TargetOpcode::G_UBFX: {
729 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
730 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
731 Depth: Depth + 1);
732 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
733 Depth: Depth + 1);
734 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
735 Depth: Depth + 1);
736 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
737 break;
738 }
739 case TargetOpcode::G_SBFX: {
740 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
741 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
742 Depth: Depth + 1);
743 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
744 Depth: Depth + 1);
745 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
746 Depth: Depth + 1);
747 OffsetKnown = OffsetKnown.sext(BitWidth);
748 WidthKnown = WidthKnown.sext(BitWidth);
749 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
750 // Sign extend the extracted value using shift left and arithmetic shift
751 // right.
752 KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth));
753 KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown);
754 Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown);
755 break;
756 }
757 case TargetOpcode::G_UADDO:
758 case TargetOpcode::G_UADDE:
759 case TargetOpcode::G_SADDO:
760 case TargetOpcode::G_SADDE: {
761 if (MI.getOperand(i: 1).getReg() == R) {
762 // If we know the result of a compare has the top bits zero, use this
763 // info.
764 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
765 TargetLowering::ZeroOrOneBooleanContent &&
766 BitWidth > 1)
767 Known.Zero.setBitsFrom(1);
768 break;
769 }
770
771 assert(MI.getOperand(0).getReg() == R &&
772 "We only compute knownbits for the sum here.");
773 // With [US]ADDE, a carry bit may be added in.
774 KnownBits Carry(1);
775 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
776 computeKnownBitsImpl(R: MI.getOperand(i: 4).getReg(), Known&: Carry, DemandedElts,
777 Depth: Depth + 1);
778 // Carry has bit width 1
779 Carry = Carry.trunc(BitWidth: 1);
780 } else {
781 Carry.setAllZero();
782 }
783
784 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
785 Depth: Depth + 1);
786 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: Known2, DemandedElts,
787 Depth: Depth + 1);
788 Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry);
789 break;
790 }
791 case TargetOpcode::G_USUBO:
792 case TargetOpcode::G_USUBE:
793 case TargetOpcode::G_SSUBO:
794 case TargetOpcode::G_SSUBE:
795 case TargetOpcode::G_UMULO:
796 case TargetOpcode::G_SMULO: {
797 if (MI.getOperand(i: 1).getReg() == R) {
798 // If we know the result of a compare has the top bits zero, use this
799 // info.
800 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
801 TargetLowering::ZeroOrOneBooleanContent &&
802 BitWidth > 1)
803 Known.Zero.setBitsFrom(1);
804 }
805 break;
806 }
807 case TargetOpcode::G_CTTZ:
808 case TargetOpcode::G_CTTZ_ZERO_POISON: {
809 KnownBits SrcOpKnown;
810 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
811 Depth: Depth + 1);
812 // If we have a known 1, its position is our upper bound
813 unsigned PossibleTZ = SrcOpKnown.countMaxTrailingZeros();
814 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
815 Known.Zero.setBitsFrom(LowBits);
816 break;
817 }
818 case TargetOpcode::G_CTLZ:
819 case TargetOpcode::G_CTLZ_ZERO_POISON: {
820 KnownBits SrcOpKnown;
821 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
822 Depth: Depth + 1);
823 // If we have a known 1, its position is our upper bound.
824 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
825 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
826 Known.Zero.setBitsFrom(LowBits);
827 break;
828 }
829 case TargetOpcode::G_CTLS: {
830 Register Reg = MI.getOperand(i: 1).getReg();
831 unsigned MinRedundantSignBits = computeNumSignBits(R: Reg, Depth: Depth + 1) - 1;
832
833 unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
834
835 ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
836 APInt(BitWidth, MaxUpperRedundantSignBits));
837
838 Known = Range.toKnownBits();
839 break;
840 }
841 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
842 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
843 Register InVec = Extract.getVectorReg();
844 Register EltNo = Extract.getIndexReg();
845
846 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
847
848 LLT VecVT = MRI.getType(Reg: InVec);
849 // computeKnownBits not yet implemented for scalable vectors.
850 if (VecVT.isScalableVector())
851 break;
852
853 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
854 const unsigned NumSrcElts = VecVT.getNumElements();
855 // A return type different from the vector's element type may lead to
856 // issues with pattern selection. Bail out to avoid that.
857 if (BitWidth > EltBitWidth)
858 break;
859
860 Known.Zero.setAllBits();
861 Known.One.setAllBits();
862
863 // If we know the element index, just demand that vector element, else for
864 // an unknown element index, ignore DemandedElts and demand them all.
865 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
866 if (ConstEltNo && ConstEltNo->ult(RHS: NumSrcElts))
867 DemandedSrcElts =
868 APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue());
869
870 computeKnownBitsImpl(R: InVec, Known, DemandedElts: DemandedSrcElts, Depth: Depth + 1);
871 break;
872 }
873 case TargetOpcode::G_INSERT_VECTOR_ELT: {
874 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
875 Register InVec = Insert.getVectorReg();
876 Register InVal = Insert.getElementReg();
877 Register EltNo = Insert.getIndexReg();
878 LLT VecVT = MRI.getType(Reg: InVec);
879
880 if (VecVT.isScalableVector())
881 break;
882
883 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
884 unsigned NumElts = VecVT.getNumElements();
885
886 bool DemandedVal = true;
887 APInt DemandedVecElts = DemandedElts;
888 if (ConstEltNo && ConstEltNo->ult(RHS: NumElts)) {
889 unsigned EltIdx = ConstEltNo->getZExtValue();
890 DemandedVal = !!DemandedElts[EltIdx];
891 DemandedVecElts.clearBit(BitPosition: EltIdx);
892 }
893 Known.setAllConflict();
894 if (DemandedVal) {
895 computeKnownBitsImpl(R: InVal, Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
896 Known = Known.intersectWith(RHS: Known2.zextOrTrunc(BitWidth));
897 }
898 if (!!DemandedVecElts) {
899 computeKnownBitsImpl(R: InVec, Known&: Known2, DemandedElts: DemandedVecElts, Depth: Depth + 1);
900 Known = Known.intersectWith(RHS: Known2);
901 }
902 break;
903 }
904 case TargetOpcode::G_SHUFFLE_VECTOR: {
905 APInt DemandedLHS, DemandedRHS;
906 // Collect the known bits that are shared by every vector element referenced
907 // by the shuffle.
908 unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
909 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
910 DemandedElts, DemandedLHS, DemandedRHS))
911 break;
912
913 // Known bits are the values that are shared by every demanded element.
914 Known.Zero.setAllBits();
915 Known.One.setAllBits();
916 if (!!DemandedLHS) {
917 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS,
918 Depth: Depth + 1);
919 Known = Known.intersectWith(RHS: Known2);
920 }
921 // If we don't know any bits, early out.
922 if (Known.isUnknown())
923 break;
924 if (!!DemandedRHS) {
925 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS,
926 Depth: Depth + 1);
927 Known = Known.intersectWith(RHS: Known2);
928 }
929 break;
930 }
931 case TargetOpcode::G_CONCAT_VECTORS: {
932 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
933 break;
934 // Split DemandedElts and test each of the demanded subvectors.
935 Known.Zero.setAllBits();
936 Known.One.setAllBits();
937 unsigned NumSubVectorElts =
938 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
939
940 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
941 APInt DemandedSub =
942 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
943 if (!!DemandedSub) {
944 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1);
945
946 Known = Known.intersectWith(RHS: Known2);
947 }
948 // If we don't know any bits, early out.
949 if (Known.isUnknown())
950 break;
951 }
952 break;
953 }
954 case TargetOpcode::G_ABS: {
955 Register SrcReg = MI.getOperand(i: 1).getReg();
956 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
957 Known = Known.abs();
958 Known.Zero.setHighBits(computeNumSignBits(R: SrcReg, DemandedElts, Depth: Depth + 1) -
959 1);
960 break;
961 }
962 }
963
964 LLVM_DEBUG(dumpResult(MI, Known, Depth));
965}
966
967void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
968 FPClassTest InterestedClasses,
969 unsigned Depth) {
970 LLT Ty = MRI.getType(Reg: R);
971 APInt DemandedElts =
972 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
973 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
974}
975
976/// Return true if this value is known to be the fractional part x - floor(x),
977/// which lies in [0, 1). This implies the value cannot introduce overflow in a
978/// fmul when the other operand is known finite.
979static bool isAbsoluteValueULEOne(Register R, const MachineRegisterInfo &MRI) {
980 using namespace MIPatternMatch;
981 Register SubX;
982 return mi_match(R, MRI, P: m_GFSub(L: m_Reg(R&: SubX), R: m_GFFloor(Src: m_DeferredReg(R&: SubX))));
983}
984
985void GISelValueTracking::computeKnownFPClassForFPTrunc(
986 const MachineInstr &MI, const APInt &DemandedElts,
987 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
988 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
989 fcNone)
990 return;
991
992 Register Val = MI.getOperand(i: 1).getReg();
993 KnownFPClass KnownSrc;
994 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
995 Depth: Depth + 1);
996 Known = KnownFPClass::fptrunc(KnownSrc);
997}
998
999void GISelValueTracking::computeKnownFPClass(Register R,
1000 const APInt &DemandedElts,
1001 FPClassTest InterestedClasses,
1002 KnownFPClass &Known,
1003 unsigned Depth) {
1004 assert(Known.isUnknown() && "should not be called with known information");
1005
1006 if (!DemandedElts) {
1007 // No demanded elts, better to assume we don't know anything.
1008 Known.resetAll();
1009 return;
1010 }
1011
1012 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1013
1014 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
1015 unsigned Opcode = MI.getOpcode();
1016 LLT DstTy = MRI.getType(Reg: R);
1017
1018 if (!DstTy.isValid()) {
1019 Known.resetAll();
1020 return;
1021 }
1022
1023 if (auto Cst = GFConstant::getConstant(Const: R, MRI)) {
1024 switch (Cst->getKind()) {
1025 case GFConstant::GFConstantKind::Scalar: {
1026 auto APF = Cst->getScalarValue();
1027 Known.KnownFPClasses = APF.classify();
1028 Known.SignBit = APF.isNegative();
1029 break;
1030 }
1031 case GFConstant::GFConstantKind::FixedVector: {
1032 Known.KnownFPClasses = fcNone;
1033 bool SignBitAllZero = true;
1034 bool SignBitAllOne = true;
1035
1036 for (auto C : *Cst) {
1037 Known.KnownFPClasses |= C.classify();
1038 if (C.isNegative())
1039 SignBitAllZero = false;
1040 else
1041 SignBitAllOne = false;
1042 }
1043
1044 if (SignBitAllOne != SignBitAllZero)
1045 Known.SignBit = SignBitAllOne;
1046
1047 break;
1048 }
1049 case GFConstant::GFConstantKind::ScalableVector: {
1050 Known.resetAll();
1051 break;
1052 }
1053 }
1054
1055 return;
1056 }
1057
1058 FPClassTest KnownNotFromFlags = fcNone;
1059 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans))
1060 KnownNotFromFlags |= fcNan;
1061 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs))
1062 KnownNotFromFlags |= fcInf;
1063
1064 // We no longer need to find out about these bits from inputs if we can
1065 // assume this from flags/attributes.
1066 InterestedClasses &= ~KnownNotFromFlags;
1067
1068 llvm::scope_exit ClearClassesFromFlags(
1069 [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); });
1070
1071 // All recursive calls that increase depth must come after this.
1072 if (Depth == MaxAnalysisRecursionDepth)
1073 return;
1074
1075 const MachineFunction *MF = MI.getMF();
1076
1077 switch (Opcode) {
1078 default:
1079 TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
1080 Depth);
1081 break;
1082 case TargetOpcode::G_FNEG: {
1083 Register Val = MI.getOperand(i: 1).getReg();
1084 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1085 Known.fneg();
1086 break;
1087 }
1088 case TargetOpcode::G_SELECT: {
1089 GSelect &SelMI = cast<GSelect>(Val&: MI);
1090 Register Cond = SelMI.getCondReg();
1091 Register LHS = SelMI.getTrueReg();
1092 Register RHS = SelMI.getFalseReg();
1093
1094 FPClassTest FilterLHS = fcAllFlags;
1095 FPClassTest FilterRHS = fcAllFlags;
1096
1097 Register TestedValue;
1098 FPClassTest MaskIfTrue = fcAllFlags;
1099 FPClassTest MaskIfFalse = fcAllFlags;
1100 FPClassTest ClassVal = fcNone;
1101
1102 CmpInst::Predicate Pred;
1103 Register CmpLHS, CmpRHS;
1104 if (mi_match(R: Cond, MRI,
1105 P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) {
1106 // If the select filters out a value based on the class, it no longer
1107 // participates in the class of the result
1108
1109 // TODO: In some degenerate cases we can infer something if we try again
1110 // without looking through sign operations.
1111 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
1112 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
1113 fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
1114 } else if (mi_match(
1115 R: Cond, MRI,
1116 P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) {
1117 FPClassTest TestedMask = ClassVal;
1118 MaskIfTrue = TestedMask;
1119 MaskIfFalse = ~TestedMask;
1120 }
1121
1122 if (TestedValue == LHS) {
1123 // match !isnan(x) ? x : y
1124 FilterLHS = MaskIfTrue;
1125 } else if (TestedValue == RHS) { // && IsExactClass
1126 // match !isnan(x) ? y : x
1127 FilterRHS = MaskIfFalse;
1128 }
1129
1130 KnownFPClass Known2;
1131 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
1132 Depth: Depth + 1);
1133 Known.KnownFPClasses &= FilterLHS;
1134
1135 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
1136 Known&: Known2, Depth: Depth + 1);
1137 Known2.KnownFPClasses &= FilterRHS;
1138
1139 Known |= Known2;
1140 break;
1141 }
1142 case TargetOpcode::G_FCOPYSIGN: {
1143 Register Magnitude = MI.getOperand(i: 1).getReg();
1144 Register Sign = MI.getOperand(i: 2).getReg();
1145
1146 KnownFPClass KnownSign;
1147
1148 computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known,
1149 Depth: Depth + 1);
1150 computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign,
1151 Depth: Depth + 1);
1152 Known.copysign(Sign: KnownSign);
1153 break;
1154 }
1155 case TargetOpcode::G_FMA:
1156 case TargetOpcode::G_STRICT_FMA:
1157 case TargetOpcode::G_FMAD: {
1158 if ((InterestedClasses & fcNegative) == fcNone)
1159 break;
1160
1161 Register A = MI.getOperand(i: 1).getReg();
1162 Register B = MI.getOperand(i: 2).getReg();
1163 Register C = MI.getOperand(i: 3).getReg();
1164
1165 DenormalMode Mode =
1166 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1167
1168 if (A == B && isGuaranteedNotToBeUndef(Reg: A, MRI, Depth: Depth + 1)) {
1169 // x * x + y
1170 KnownFPClass KnownSrc, KnownAddend;
1171 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend,
1172 Depth: Depth + 1);
1173 computeKnownFPClass(R: A, DemandedElts, InterestedClasses, Known&: KnownSrc,
1174 Depth: Depth + 1);
1175 if (KnownNotFromFlags) {
1176 KnownSrc.knownNot(RuleOut: KnownNotFromFlags);
1177 KnownAddend.knownNot(RuleOut: KnownNotFromFlags);
1178 }
1179 Known = KnownFPClass::fma_square(Squared: KnownSrc, Addend: KnownAddend, Mode);
1180 } else {
1181 KnownFPClass KnownSrc[3];
1182 computeKnownFPClass(R: A, DemandedElts, InterestedClasses, Known&: KnownSrc[0],
1183 Depth: Depth + 1);
1184 if (KnownSrc[0].isUnknown())
1185 break;
1186 computeKnownFPClass(R: B, DemandedElts, InterestedClasses, Known&: KnownSrc[1],
1187 Depth: Depth + 1);
1188 if (KnownSrc[1].isUnknown())
1189 break;
1190 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownSrc[2],
1191 Depth: Depth + 1);
1192 if (KnownSrc[2].isUnknown())
1193 break;
1194 if (KnownNotFromFlags) {
1195 KnownSrc[0].knownNot(RuleOut: KnownNotFromFlags);
1196 KnownSrc[1].knownNot(RuleOut: KnownNotFromFlags);
1197 KnownSrc[2].knownNot(RuleOut: KnownNotFromFlags);
1198 }
1199 Known = KnownFPClass::fma(LHS: KnownSrc[0], RHS: KnownSrc[1], Addend: KnownSrc[2], Mode);
1200 }
1201 break;
1202 }
1203 case TargetOpcode::G_FSQRT:
1204 case TargetOpcode::G_STRICT_FSQRT: {
1205 KnownFPClass KnownSrc;
1206 FPClassTest InterestedSrcs = InterestedClasses;
1207 if (InterestedClasses & fcNan)
1208 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1209
1210 Register Val = MI.getOperand(i: 1).getReg();
1211 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1212
1213 DenormalMode Mode =
1214 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1215 Known = KnownFPClass::sqrt(Src: KnownSrc, Mode);
1216 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNsz))
1217 Known.knownNot(RuleOut: fcNegZero);
1218 break;
1219 }
1220 case TargetOpcode::G_FABS: {
1221 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1222 Register Val = MI.getOperand(i: 1).getReg();
1223 // If we only care about the sign bit we don't need to inspect the
1224 // operand.
1225 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known,
1226 Depth: Depth + 1);
1227 }
1228 Known.fabs();
1229 break;
1230 }
1231 case TargetOpcode::G_FATAN2: {
1232 Register Y = MI.getOperand(i: 1).getReg();
1233 Register X = MI.getOperand(i: 2).getReg();
1234 KnownFPClass KnownY, KnownX;
1235 computeKnownFPClass(R: Y, DemandedElts, InterestedClasses, Known&: KnownY, Depth: Depth + 1);
1236 computeKnownFPClass(R: X, DemandedElts, InterestedClasses, Known&: KnownX, Depth: Depth + 1);
1237 Known = KnownFPClass::atan2(LHS: KnownY, RHS: KnownX);
1238 break;
1239 }
1240 case TargetOpcode::G_FSINH: {
1241 Register Val = MI.getOperand(i: 1).getReg();
1242 KnownFPClass KnownSrc;
1243 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1244 Depth: Depth + 1);
1245 Known = KnownFPClass::sinh(Src: KnownSrc);
1246 break;
1247 }
1248 case TargetOpcode::G_FCOSH: {
1249 Register Val = MI.getOperand(i: 1).getReg();
1250 KnownFPClass KnownSrc;
1251 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1252 Depth: Depth + 1);
1253 Known = KnownFPClass::cosh(Src: KnownSrc);
1254 break;
1255 }
1256 case TargetOpcode::G_FTANH: {
1257 Register Val = MI.getOperand(i: 1).getReg();
1258 KnownFPClass KnownSrc;
1259 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1260 Depth: Depth + 1);
1261 Known = KnownFPClass::tanh(Src: KnownSrc);
1262 break;
1263 }
1264 case TargetOpcode::G_FASIN: {
1265 Register Val = MI.getOperand(i: 1).getReg();
1266 KnownFPClass KnownSrc;
1267 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1268 Depth: Depth + 1);
1269 Known = KnownFPClass::asin(Src: KnownSrc);
1270 break;
1271 }
1272 case TargetOpcode::G_FACOS: {
1273 Register Val = MI.getOperand(i: 1).getReg();
1274 KnownFPClass KnownSrc;
1275 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1276 Depth: Depth + 1);
1277 Known = KnownFPClass::acos(Src: KnownSrc);
1278 break;
1279 }
1280 case TargetOpcode::G_FATAN: {
1281 Register Val = MI.getOperand(i: 1).getReg();
1282 KnownFPClass KnownSrc;
1283 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1284 Depth: Depth + 1);
1285 Known = KnownFPClass::atan(Src: KnownSrc);
1286 break;
1287 }
1288 case TargetOpcode::G_FTAN: {
1289 Register Val = MI.getOperand(i: 1).getReg();
1290 KnownFPClass KnownSrc;
1291 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1292 Depth: Depth + 1);
1293 Known = KnownFPClass::tan(Src: KnownSrc);
1294 break;
1295 }
1296 case TargetOpcode::G_FSIN:
1297 case TargetOpcode::G_FCOS: {
1298 // Return NaN on infinite inputs.
1299 Register Val = MI.getOperand(i: 1).getReg();
1300 KnownFPClass KnownSrc;
1301 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1302 Depth: Depth + 1);
1303 Known = Opcode == TargetOpcode::G_FCOS ? KnownFPClass::cos(Src: KnownSrc)
1304 : KnownFPClass::sin(Src: KnownSrc);
1305 break;
1306 }
1307 case TargetOpcode::G_FSINCOS: {
1308 // Operand layout: (sin_dst, cos_dst, src)
1309 Register Src = MI.getOperand(i: 2).getReg();
1310 KnownFPClass KnownSrc;
1311 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known&: KnownSrc,
1312 Depth: Depth + 1);
1313 if (R == MI.getOperand(i: 0).getReg())
1314 Known = KnownFPClass::sin(Src: KnownSrc);
1315 else
1316 Known = KnownFPClass::cos(Src: KnownSrc);
1317 break;
1318 }
1319 case TargetOpcode::G_FMAXNUM:
1320 case TargetOpcode::G_FMINNUM:
1321 case TargetOpcode::G_FMINNUM_IEEE:
1322 case TargetOpcode::G_FMAXIMUM:
1323 case TargetOpcode::G_FMINIMUM:
1324 case TargetOpcode::G_FMAXNUM_IEEE:
1325 case TargetOpcode::G_FMAXIMUMNUM:
1326 case TargetOpcode::G_FMINIMUMNUM: {
1327 Register LHS = MI.getOperand(i: 1).getReg();
1328 Register RHS = MI.getOperand(i: 2).getReg();
1329 KnownFPClass KnownLHS, KnownRHS;
1330
1331 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS,
1332 Depth: Depth + 1);
1333 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS,
1334 Depth: Depth + 1);
1335
1336 KnownFPClass::MinMaxKind Kind;
1337 switch (Opcode) {
1338 case TargetOpcode::G_FMINIMUM:
1339 Kind = KnownFPClass::MinMaxKind::minimum;
1340 break;
1341 case TargetOpcode::G_FMAXIMUM:
1342 Kind = KnownFPClass::MinMaxKind::maximum;
1343 break;
1344 case TargetOpcode::G_FMINIMUMNUM:
1345 Kind = KnownFPClass::MinMaxKind::minimumnum;
1346 break;
1347 case TargetOpcode::G_FMAXIMUMNUM:
1348 Kind = KnownFPClass::MinMaxKind::maximumnum;
1349 break;
1350 case TargetOpcode::G_FMINNUM:
1351 case TargetOpcode::G_FMINNUM_IEEE:
1352 Kind = KnownFPClass::MinMaxKind::minnum;
1353 break;
1354 case TargetOpcode::G_FMAXNUM:
1355 case TargetOpcode::G_FMAXNUM_IEEE:
1356 Kind = KnownFPClass::MinMaxKind::maxnum;
1357 break;
1358 default:
1359 llvm_unreachable("unhandled min/max opcode");
1360 }
1361
1362 DenormalMode Mode =
1363 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1364 Known = KnownFPClass::minMaxLike(LHS: KnownLHS, RHS: KnownRHS, Kind, DenormMode: Mode);
1365 break;
1366 }
1367 case TargetOpcode::G_FCANONICALIZE: {
1368 Register Val = MI.getOperand(i: 1).getReg();
1369 KnownFPClass KnownSrc;
1370 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1371 Depth: Depth + 1);
1372
1373 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1374 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1375 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1376 Known = KnownFPClass::canonicalize(Src: KnownSrc, DenormMode);
1377 break;
1378 }
1379 case TargetOpcode::G_VECREDUCE_FMAX:
1380 case TargetOpcode::G_VECREDUCE_FMIN:
1381 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1382 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1383 Register Val = MI.getOperand(i: 1).getReg();
1384 // reduce min/max will choose an element from one of the vector elements,
1385 // so we can infer and class information that is common to all elements.
1386
1387 Known =
1388 computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1);
1389 // Can only propagate sign if output is never NaN.
1390 if (!Known.isKnownNeverNaN())
1391 Known.SignBit.reset();
1392 break;
1393 }
1394 case TargetOpcode::G_FFLOOR:
1395 case TargetOpcode::G_FCEIL:
1396 case TargetOpcode::G_FRINT:
1397 case TargetOpcode::G_FNEARBYINT:
1398 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1399 case TargetOpcode::G_INTRINSIC_ROUND:
1400 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1401 case TargetOpcode::G_INTRINSIC_TRUNC: {
1402 Register Val = MI.getOperand(i: 1).getReg();
1403 KnownFPClass KnownSrc;
1404 FPClassTest InterestedSrcs = InterestedClasses;
1405 if (InterestedSrcs & fcPosFinite)
1406 InterestedSrcs |= fcPosFinite;
1407 if (InterestedSrcs & fcNegFinite)
1408 InterestedSrcs |= fcNegFinite;
1409 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1410
1411 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1412 bool IsTrunc = Opcode == TargetOpcode::G_INTRINSIC_TRUNC;
1413 Known = KnownFPClass::roundToIntegral(Src: KnownSrc, IsTrunc,
1414 /*IsMultiUnitFPType=*/false);
1415 break;
1416 }
1417 case TargetOpcode::G_FEXP:
1418 case TargetOpcode::G_FEXP2:
1419 case TargetOpcode::G_FEXP10: {
1420 Register Val = MI.getOperand(i: 1).getReg();
1421 KnownFPClass KnownSrc;
1422 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1423 Depth: Depth + 1);
1424 Known = KnownFPClass::exp(Src: KnownSrc);
1425 break;
1426 }
1427 case TargetOpcode::G_FLOG:
1428 case TargetOpcode::G_FLOG2:
1429 case TargetOpcode::G_FLOG10: {
1430 // log(+inf) -> +inf
1431 // log([+-]0.0) -> -inf
1432 // log(-inf) -> nan
1433 // log(-x) -> nan
1434 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1435 break;
1436
1437 FPClassTest InterestedSrcs = InterestedClasses;
1438 if ((InterestedClasses & fcNegInf) != fcNone)
1439 InterestedSrcs |= fcZero | fcSubnormal;
1440 if ((InterestedClasses & fcNan) != fcNone)
1441 InterestedSrcs |= fcNan | fcNegative;
1442
1443 Register Val = MI.getOperand(i: 1).getReg();
1444 KnownFPClass KnownSrc;
1445 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1446
1447 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1448 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1449 DenormalMode Mode = MF->getDenormalMode(FPType: FltSem);
1450 Known = KnownFPClass::log(Src: KnownSrc, Mode);
1451 break;
1452 }
1453 case TargetOpcode::G_FPOWI: {
1454 if ((InterestedClasses & (fcNan | fcInf | fcNegative)) == fcNone)
1455 break;
1456
1457 Register Exp = MI.getOperand(i: 2).getReg();
1458 LLT ExpTy = MRI.getType(Reg: Exp);
1459 KnownBits ExponentKnownBits = getKnownBits(
1460 R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1461
1462 FPClassTest InterestedSrcs = fcNone;
1463 if (InterestedClasses & fcNan)
1464 InterestedSrcs |= fcNan;
1465 if (!ExponentKnownBits.isZero()) {
1466 if (InterestedClasses & fcInf)
1467 InterestedSrcs |= fcFinite | fcInf;
1468 if ((InterestedClasses & fcNegative) && !ExponentKnownBits.isEven())
1469 InterestedSrcs |= fcNegative;
1470 }
1471
1472 KnownFPClass KnownSrc;
1473 if (InterestedSrcs != fcNone) {
1474 Register Val = MI.getOperand(i: 1).getReg();
1475 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc,
1476 Depth: Depth + 1);
1477 }
1478
1479 Known = KnownFPClass::powi(Src: KnownSrc, N: ExponentKnownBits);
1480 break;
1481 }
1482 case TargetOpcode::G_FLDEXP:
1483 case TargetOpcode::G_STRICT_FLDEXP: {
1484 Register Val = MI.getOperand(i: 1).getReg();
1485 KnownFPClass KnownSrc;
1486 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1487 Depth: Depth + 1);
1488
1489 // Can refine inf/zero handling based on the exponent operand.
1490 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1491 KnownBits ExpBits;
1492 if ((KnownSrc.KnownFPClasses & ExpInfoMask) != fcNone) {
1493 Register ExpReg = MI.getOperand(i: 2).getReg();
1494 LLT ExpTy = MRI.getType(Reg: ExpReg);
1495 ExpBits = getKnownBits(
1496 R: ExpReg, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1497 }
1498
1499 LLT ScalarTy = DstTy.getScalarType();
1500 const fltSemantics &Flt = getFltSemanticForLLT(Ty: ScalarTy);
1501 DenormalMode Mode = MF->getDenormalMode(FPType: Flt);
1502 Known = KnownFPClass::ldexp(Src: KnownSrc, N: ExpBits, Flt, Mode);
1503 break;
1504 }
1505 case TargetOpcode::G_FADD:
1506 case TargetOpcode::G_STRICT_FADD:
1507 case TargetOpcode::G_FSUB:
1508 case TargetOpcode::G_STRICT_FSUB: {
1509 Register LHS = MI.getOperand(i: 1).getReg();
1510 Register RHS = MI.getOperand(i: 2).getReg();
1511 bool IsAdd = (Opcode == TargetOpcode::G_FADD ||
1512 Opcode == TargetOpcode::G_STRICT_FADD);
1513 bool WantNegative =
1514 IsAdd &&
1515 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1516 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1517 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1518
1519 if (!WantNaN && !WantNegative && !WantNegZero) {
1520 break;
1521 }
1522
1523 DenormalMode Mode =
1524 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1525
1526 FPClassTest InterestedSrcs = InterestedClasses;
1527 if (WantNegative)
1528 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1529 if (InterestedClasses & fcNan)
1530 InterestedSrcs |= fcInf;
1531
1532 // Special case fadd x, x (canonical form of fmul x, 2).
1533 if (IsAdd && LHS == RHS && isGuaranteedNotToBeUndef(Reg: LHS, MRI, Depth: Depth + 1)) {
1534 KnownFPClass KnownSelf;
1535 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSelf,
1536 Depth: Depth + 1);
1537 Known = KnownFPClass::fadd_self(Src: KnownSelf, Mode);
1538 break;
1539 }
1540
1541 KnownFPClass KnownLHS, KnownRHS;
1542 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1);
1543
1544 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1545 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1546 WantNegZero || !IsAdd) {
1547 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1548 // there's no point.
1549 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS,
1550 Depth: Depth + 1);
1551 }
1552
1553 if (IsAdd)
1554 Known = KnownFPClass::fadd(LHS: KnownLHS, RHS: KnownRHS, Mode);
1555 else
1556 Known = KnownFPClass::fsub(LHS: KnownLHS, RHS: KnownRHS, Mode);
1557 break;
1558 }
1559 case TargetOpcode::G_FMUL:
1560 case TargetOpcode::G_STRICT_FMUL: {
1561 Register LHS = MI.getOperand(i: 1).getReg();
1562 Register RHS = MI.getOperand(i: 2).getReg();
1563 DenormalMode Mode =
1564 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1565
1566 // X * X is always non-negative or a NaN (use square() for precision).
1567 if (LHS == RHS && isGuaranteedNotToBeUndef(Reg: LHS, MRI, Depth: Depth + 1)) {
1568 KnownFPClass KnownSrc;
1569 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: fcAllFlags, Known&: KnownSrc, Depth: Depth + 1);
1570 Known = KnownFPClass::square(Src: KnownSrc, Mode);
1571 } else {
1572 // If RHS is a scalar constant, use the more precise APFloat overload.
1573 auto RHSCst = GFConstant::getConstant(Const: RHS, MRI);
1574 if (RHSCst && RHSCst->getKind() == GFConstant::GFConstantKind::Scalar) {
1575 KnownFPClass KnownLHS;
1576 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: fcAllFlags, Known&: KnownLHS, Depth: Depth + 1);
1577 Known = KnownFPClass::fmul(LHS: KnownLHS, RHS: RHSCst->getScalarValue(), Mode);
1578 } else {
1579 KnownFPClass KnownLHS, KnownRHS;
1580 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcAllFlags, Known&: KnownRHS, Depth: Depth + 1);
1581 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: fcAllFlags, Known&: KnownLHS, Depth: Depth + 1);
1582 Known = KnownFPClass::fmul(LHS: KnownLHS, RHS: KnownRHS, Mode);
1583
1584 // If one operand is known |x| <= 1 and the other is finite, the
1585 // product cannot overflow to infinity.
1586 if (KnownLHS.isKnownNever(Mask: fcInf) && isAbsoluteValueULEOne(R: RHS, MRI))
1587 Known.knownNot(RuleOut: fcInf);
1588 else if (KnownRHS.isKnownNever(Mask: fcInf) &&
1589 isAbsoluteValueULEOne(R: LHS, MRI))
1590 Known.knownNot(RuleOut: fcInf);
1591 }
1592 }
1593 break;
1594 }
1595 case TargetOpcode::G_FDIV:
1596 case TargetOpcode::G_FREM: {
1597 Register LHS = MI.getOperand(i: 1).getReg();
1598 Register RHS = MI.getOperand(i: 2).getReg();
1599
1600 if (Opcode == TargetOpcode::G_FREM)
1601 Known.knownNot(RuleOut: fcInf);
1602
1603 DenormalMode Mode =
1604 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1605
1606 if (LHS == RHS && isGuaranteedNotToBeUndef(Reg: LHS, MRI, Depth: Depth + 1)) {
1607 if (Opcode == TargetOpcode::G_FDIV) {
1608 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1609 if (!WantNan) {
1610 // X / X is always exactly 1.0 or a NaN.
1611 Known.KnownFPClasses = fcPosNormal | fcNan;
1612 break;
1613 }
1614 KnownFPClass KnownSrc;
1615 computeKnownFPClass(R: LHS, DemandedElts,
1616 InterestedClasses: fcNan | fcInf | fcZero | fcSubnormal, Known&: KnownSrc,
1617 Depth: Depth + 1);
1618 Known = KnownFPClass::fdiv_self(Src: KnownSrc, Mode);
1619 } else {
1620 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1621 if (!WantNan) {
1622 // X % X is always exactly [+-]0.0 or a NaN.
1623 Known.KnownFPClasses = fcZero | fcNan;
1624 break;
1625 }
1626 KnownFPClass KnownSrc;
1627 computeKnownFPClass(R: LHS, DemandedElts,
1628 InterestedClasses: fcNan | fcInf | fcZero | fcSubnormal, Known&: KnownSrc,
1629 Depth: Depth + 1);
1630 Known = KnownFPClass::frem_self(Src: KnownSrc, Mode);
1631 }
1632 break;
1633 }
1634
1635 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1636 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1637 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1638 (InterestedClasses & fcPositive) != fcNone;
1639 if (!WantNan && !WantNegative && !WantPositive) {
1640 break;
1641 }
1642
1643 KnownFPClass KnownLHS, KnownRHS;
1644
1645 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative,
1646 Known&: KnownRHS, Depth: Depth + 1);
1647
1648 bool KnowSomethingUseful = KnownRHS.isKnownNeverNaN() ||
1649 KnownRHS.isKnownNever(Mask: fcNegative) ||
1650 KnownRHS.isKnownNever(Mask: fcPositive);
1651
1652 if (KnowSomethingUseful || WantPositive) {
1653 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: fcAllFlags, Known&: KnownLHS, Depth: Depth + 1);
1654 }
1655
1656 if (Opcode == TargetOpcode::G_FDIV) {
1657 Known = KnownFPClass::fdiv(LHS: KnownLHS, RHS: KnownRHS, Mode);
1658 } else {
1659 // Inf REM x and x REM 0 produce NaN.
1660 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1661 KnownLHS.isKnownNeverInfinity() &&
1662 KnownRHS.isKnownNeverLogicalZero(Mode)) {
1663 Known.knownNot(RuleOut: fcNan);
1664 }
1665
1666 // The sign for frem is the same as the first operand.
1667 if (KnownLHS.cannotBeOrderedLessThanZero())
1668 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1669 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1670 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1671
1672 // See if we can be more aggressive about the sign of 0.
1673 if (KnownLHS.isKnownNever(Mask: fcNegative))
1674 Known.knownNot(RuleOut: fcNegative);
1675 if (KnownLHS.isKnownNever(Mask: fcPositive))
1676 Known.knownNot(RuleOut: fcPositive);
1677 }
1678 break;
1679 }
1680 case TargetOpcode::G_FFREXP: {
1681 // Only handle the mantissa output (operand 0); the exponent is an integer.
1682 if (R != MI.getOperand(i: 0).getReg())
1683 break;
1684 Register Src = MI.getOperand(i: 2).getReg();
1685 KnownFPClass KnownSrc;
1686 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known&: KnownSrc,
1687 Depth: Depth + 1);
1688 DenormalMode Mode =
1689 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1690 Known = KnownFPClass::frexp_mant(Src: KnownSrc, Mode);
1691 break;
1692 }
1693 case TargetOpcode::G_FPEXT: {
1694 Register Src = MI.getOperand(i: 1).getReg();
1695 KnownFPClass KnownSrc;
1696 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known&: KnownSrc,
1697 Depth: Depth + 1);
1698
1699 LLT DstScalarTy = DstTy.getScalarType();
1700 const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstScalarTy);
1701 LLT SrcTy = MRI.getType(Reg: Src).getScalarType();
1702 const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy);
1703
1704 Known = KnownFPClass::fpext(KnownSrc, DstTy: DstSem, SrcTy: SrcSem);
1705 break;
1706 }
1707 case TargetOpcode::G_FPTRUNC: {
1708 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1709 Depth);
1710 break;
1711 }
1712 case TargetOpcode::G_SITOFP:
1713 case TargetOpcode::G_UITOFP: {
1714 // Cannot produce nan
1715 Known.knownNot(RuleOut: fcNan);
1716
1717 // Integers cannot be subnormal
1718 Known.knownNot(RuleOut: fcSubnormal);
1719
1720 // sitofp and uitofp turn into +0.0 for zero.
1721 Known.knownNot(RuleOut: fcNegZero);
1722
1723 // UIToFP is always non-negative regardless of known bits.
1724 if (Opcode == TargetOpcode::G_UITOFP)
1725 Known.signBitMustBeZero();
1726
1727 // Only compute known bits if we can learn something useful from them.
1728 if (!(InterestedClasses & (fcPosZero | fcNormal | fcInf)))
1729 break;
1730
1731 Register Val = MI.getOperand(i: 1).getReg();
1732 LLT Ty = MRI.getType(Reg: Val);
1733 KnownBits IntKnown = getKnownBits(
1734 R: Val, DemandedElts: Ty.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1735
1736 // If the integer is non-zero, the result cannot be +0.0.
1737 if (IntKnown.isNonZero())
1738 Known.knownNot(RuleOut: fcPosZero);
1739
1740 if (Opcode == TargetOpcode::G_SITOFP) {
1741 // If the signed integer is known non-negative, the result is
1742 // non-negative. If the signed integer is known negative, the result is
1743 // negative.
1744 if (IntKnown.isNonNegative())
1745 Known.signBitMustBeZero();
1746 else if (IntKnown.isNegative())
1747 Known.signBitMustBeOne();
1748 }
1749
1750 if (InterestedClasses & fcInf) {
1751 LLT FPTy = DstTy.getScalarType();
1752 const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy);
1753
1754 // Compute the effective integer width after removing known-zero leading
1755 // bits, to check if the result can overflow to infinity.
1756 int IntSize = IntKnown.getBitWidth();
1757 if (Opcode == TargetOpcode::G_UITOFP)
1758 IntSize -= IntKnown.countMinLeadingZeros();
1759 else
1760 IntSize -= IntKnown.countMinSignBits();
1761
1762 // If the exponent of the largest finite FP value can hold the largest
1763 // integer, the result of the cast must be finite.
1764 if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize)
1765 Known.knownNot(RuleOut: fcInf);
1766 }
1767
1768 break;
1769 }
1770 // case TargetOpcode::G_MERGE_VALUES:
1771 case TargetOpcode::G_BUILD_VECTOR:
1772 case TargetOpcode::G_CONCAT_VECTORS: {
1773 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI);
1774
1775 if (!DstTy.isFixedVector())
1776 break;
1777
1778 bool First = true;
1779 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1780 // We know the index we are inserting to, so clear it from Vec check.
1781 bool NeedsElt = DemandedElts[Idx];
1782
1783 // Do we demand the inserted element?
1784 if (NeedsElt) {
1785 Register Src = Merge.getSourceReg(I: Idx);
1786 if (First) {
1787 computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1);
1788 First = false;
1789 } else {
1790 KnownFPClass Known2;
1791 computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1);
1792 Known |= Known2;
1793 }
1794
1795 // If we don't know any bits, early out.
1796 if (Known.isUnknown())
1797 break;
1798 }
1799 }
1800
1801 break;
1802 }
1803 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1804 // Look through extract element. If the index is non-constant or
1805 // out-of-range demand all elements, otherwise just the extracted
1806 // element.
1807 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
1808 Register Vec = Extract.getVectorReg();
1809 Register Idx = Extract.getIndexReg();
1810
1811 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1812
1813 LLT VecTy = MRI.getType(Reg: Vec);
1814
1815 if (VecTy.isFixedVector()) {
1816 unsigned NumElts = VecTy.getNumElements();
1817 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1818 if (CIdx && CIdx->ult(RHS: NumElts))
1819 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1820 return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
1821 Depth: Depth + 1);
1822 }
1823
1824 break;
1825 }
1826 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1827 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
1828 Register Vec = Insert.getVectorReg();
1829 Register Elt = Insert.getElementReg();
1830 Register Idx = Insert.getIndexReg();
1831
1832 LLT VecTy = MRI.getType(Reg: Vec);
1833
1834 if (VecTy.isScalableVector())
1835 return;
1836
1837 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1838
1839 unsigned NumElts = DemandedElts.getBitWidth();
1840 APInt DemandedVecElts = DemandedElts;
1841 bool NeedsElt = true;
1842 // If we know the index we are inserting to, clear it from Vec check.
1843 if (CIdx && CIdx->ult(RHS: NumElts)) {
1844 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1845 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1846 }
1847
1848 // Do we demand the inserted element?
1849 if (NeedsElt) {
1850 computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1);
1851 // If we don't know any bits, early out.
1852 if (Known.isUnknown())
1853 break;
1854 } else {
1855 Known.KnownFPClasses = fcNone;
1856 }
1857
1858 // Do we need anymore elements from Vec?
1859 if (!DemandedVecElts.isZero()) {
1860 KnownFPClass Known2;
1861 computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
1862 Depth: Depth + 1);
1863 Known |= Known2;
1864 }
1865
1866 break;
1867 }
1868 case TargetOpcode::G_SHUFFLE_VECTOR: {
1869 // For undef elements, we don't know anything about the common state of
1870 // the shuffle result.
1871 GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI);
1872 APInt DemandedLHS, DemandedRHS;
1873 if (DstTy.isScalableVector()) {
1874 assert(DemandedElts == APInt(1, 1));
1875 DemandedLHS = DemandedRHS = DemandedElts;
1876 } else {
1877 unsigned NumElts = MRI.getType(Reg: Shuf.getSrc1Reg()).getNumElements();
1878 if (!llvm::getShuffleDemandedElts(SrcWidth: NumElts, Mask: Shuf.getMask(), DemandedElts,
1879 DemandedLHS, DemandedRHS)) {
1880 Known.resetAll();
1881 return;
1882 }
1883 }
1884
1885 if (!!DemandedLHS) {
1886 Register LHS = Shuf.getSrc1Reg();
1887 computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
1888 Depth: Depth + 1);
1889
1890 // If we don't know any bits, early out.
1891 if (Known.isUnknown())
1892 break;
1893 } else {
1894 Known.KnownFPClasses = fcNone;
1895 }
1896
1897 if (!!DemandedRHS) {
1898 KnownFPClass Known2;
1899 Register RHS = Shuf.getSrc2Reg();
1900 computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
1901 Depth: Depth + 1);
1902 Known |= Known2;
1903 }
1904 break;
1905 }
1906 case TargetOpcode::G_PHI: {
1907 // Cap PHI recursion below the global limit to avoid spending the entire
1908 // budget chasing loop back-edges (matches ValueTracking's
1909 // PhiRecursionLimit).
1910 if (Depth + 2 > MaxAnalysisRecursionDepth)
1911 break;
1912 // PHI's operands are a mix of registers and basic blocks interleaved.
1913 // We only care about the register ones.
1914 bool First = true;
1915 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
1916 const MachineOperand &Src = MI.getOperand(i: Idx);
1917 Register SrcReg = Src.getReg();
1918 if (First) {
1919 computeKnownFPClass(R: SrcReg, DemandedElts, InterestedClasses, Known,
1920 Depth: Depth + 1);
1921 First = false;
1922 } else {
1923 KnownFPClass Known2;
1924 computeKnownFPClass(R: SrcReg, DemandedElts, InterestedClasses, Known&: Known2,
1925 Depth: Depth + 1);
1926 Known = Known.intersectWith(RHS: Known2);
1927 }
1928 if (Known.isUnknown())
1929 break;
1930 }
1931 break;
1932 }
1933 case TargetOpcode::COPY: {
1934 Register Src = MI.getOperand(i: 1).getReg();
1935
1936 if (!Src.isVirtual())
1937 return;
1938
1939 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1940 break;
1941 }
1942 }
1943}
1944
1945KnownFPClass
1946GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1947 FPClassTest InterestedClasses,
1948 unsigned Depth) {
1949 KnownFPClass KnownClasses;
1950 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth);
1951 return KnownClasses;
1952}
1953
1954KnownFPClass GISelValueTracking::computeKnownFPClass(
1955 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1956 KnownFPClass Known;
1957 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1958 return Known;
1959}
1960
1961KnownFPClass GISelValueTracking::computeKnownFPClass(
1962 Register R, const APInt &DemandedElts, uint32_t Flags,
1963 FPClassTest InterestedClasses, unsigned Depth) {
1964 if (Flags & MachineInstr::MIFlag::FmNoNans)
1965 InterestedClasses &= ~fcNan;
1966 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1967 InterestedClasses &= ~fcInf;
1968
1969 KnownFPClass Result =
1970 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1971
1972 if (Flags & MachineInstr::MIFlag::FmNoNans)
1973 Result.KnownFPClasses &= ~fcNan;
1974 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1975 Result.KnownFPClasses &= ~fcInf;
1976 return Result;
1977}
1978
1979KnownFPClass GISelValueTracking::computeKnownFPClass(
1980 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1981 LLT Ty = MRI.getType(Reg: R);
1982 APInt DemandedElts =
1983 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
1984 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1985}
1986
1987bool GISelValueTracking::isKnownNeverNaN(Register Val, bool SNaN) {
1988 const MachineInstr *DefMI = MRI.getVRegDef(Reg: Val);
1989 if (!DefMI)
1990 return false;
1991
1992 if (DefMI->getFlag(Flag: MachineInstr::FmNoNans))
1993 return true;
1994
1995 // IEEE 754 arithmetic operations always quiet signaling NaNs. Short-circuit
1996 // the value-tracking analysis for the SNaN-only case: if the defining op is
1997 // known to quiet sNaN, the output can never be an sNaN.
1998 if (SNaN) {
1999 switch (DefMI->getOpcode()) {
2000 default:
2001 break;
2002 case TargetOpcode::G_FADD:
2003 case TargetOpcode::G_STRICT_FADD:
2004 case TargetOpcode::G_FSUB:
2005 case TargetOpcode::G_STRICT_FSUB:
2006 case TargetOpcode::G_FMUL:
2007 case TargetOpcode::G_STRICT_FMUL:
2008 case TargetOpcode::G_FDIV:
2009 case TargetOpcode::G_FREM:
2010 case TargetOpcode::G_FMA:
2011 case TargetOpcode::G_STRICT_FMA:
2012 case TargetOpcode::G_FMAD:
2013 case TargetOpcode::G_FSQRT:
2014 case TargetOpcode::G_STRICT_FSQRT:
2015 // Note: G_FABS and G_FNEG are bit-manipulation ops that preserve sNaN
2016 // exactly (LLVM LangRef: "never change anything except possibly the sign
2017 // bit"). They must NOT be listed here.
2018 case TargetOpcode::G_FSIN:
2019 case TargetOpcode::G_FCOS:
2020 case TargetOpcode::G_FSINCOS:
2021 case TargetOpcode::G_FTAN:
2022 case TargetOpcode::G_FASIN:
2023 case TargetOpcode::G_FACOS:
2024 case TargetOpcode::G_FATAN:
2025 case TargetOpcode::G_FATAN2:
2026 case TargetOpcode::G_FSINH:
2027 case TargetOpcode::G_FCOSH:
2028 case TargetOpcode::G_FTANH:
2029 case TargetOpcode::G_FEXP:
2030 case TargetOpcode::G_FEXP2:
2031 case TargetOpcode::G_FEXP10:
2032 case TargetOpcode::G_FLOG:
2033 case TargetOpcode::G_FLOG2:
2034 case TargetOpcode::G_FLOG10:
2035 case TargetOpcode::G_FPOWI:
2036 case TargetOpcode::G_FLDEXP:
2037 case TargetOpcode::G_STRICT_FLDEXP:
2038 case TargetOpcode::G_FFREXP:
2039 case TargetOpcode::G_INTRINSIC_TRUNC:
2040 case TargetOpcode::G_INTRINSIC_ROUND:
2041 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2042 case TargetOpcode::G_FFLOOR:
2043 case TargetOpcode::G_FCEIL:
2044 case TargetOpcode::G_FRINT:
2045 case TargetOpcode::G_FNEARBYINT:
2046 case TargetOpcode::G_FPEXT:
2047 case TargetOpcode::G_FPTRUNC:
2048 case TargetOpcode::G_FCANONICALIZE:
2049 case TargetOpcode::G_FMINNUM:
2050 case TargetOpcode::G_FMAXNUM:
2051 case TargetOpcode::G_FMINNUM_IEEE:
2052 case TargetOpcode::G_FMAXNUM_IEEE:
2053 case TargetOpcode::G_FMINIMUM:
2054 case TargetOpcode::G_FMAXIMUM:
2055 case TargetOpcode::G_FMINIMUMNUM:
2056 case TargetOpcode::G_FMAXIMUMNUM:
2057 return true;
2058 }
2059 }
2060
2061 KnownFPClass FPClass = computeKnownFPClass(R: Val, InterestedClasses: SNaN ? fcSNan : fcNan);
2062
2063 if (SNaN)
2064 return FPClass.isKnownNever(Mask: fcSNan);
2065
2066 return FPClass.isKnownNeverNaN();
2067}
2068
2069/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
2070unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
2071 const APInt &DemandedElts,
2072 unsigned Depth) {
2073 // Test src1 first, since we canonicalize simpler expressions to the RHS.
2074 unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth);
2075 if (Src1SignBits == 1)
2076 return 1;
2077 return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits);
2078}
2079
2080/// Compute the known number of sign bits with attached range metadata in the
2081/// memory operand. If this is an extending load, accounts for the behavior of
2082/// the high bits.
2083static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
2084 unsigned TyBits) {
2085 const MDNode *Ranges = Ld->getRanges();
2086 if (!Ranges)
2087 return 1;
2088
2089 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges);
2090 if (TyBits > CR.getBitWidth()) {
2091 switch (Ld->getOpcode()) {
2092 case TargetOpcode::G_SEXTLOAD:
2093 CR = CR.signExtend(BitWidth: TyBits);
2094 break;
2095 case TargetOpcode::G_ZEXTLOAD:
2096 CR = CR.zeroExtend(BitWidth: TyBits);
2097 break;
2098 default:
2099 break;
2100 }
2101 }
2102
2103 return std::min(a: CR.getSignedMin().getNumSignBits(),
2104 b: CR.getSignedMax().getNumSignBits());
2105}
2106
2107unsigned GISelValueTracking::computeNumSignBits(Register R,
2108 const APInt &DemandedElts,
2109 unsigned Depth) {
2110 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2111 unsigned Opcode = MI.getOpcode();
2112
2113 if (Opcode == TargetOpcode::G_CONSTANT)
2114 return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits();
2115
2116 if (Depth == getMaxDepth())
2117 return 1;
2118
2119 if (!DemandedElts)
2120 return 1; // No demanded elts, better to assume we don't know anything.
2121
2122 LLT DstTy = MRI.getType(Reg: R);
2123 const unsigned TyBits = DstTy.getScalarSizeInBits();
2124
2125 // Handle the case where this is called on a register that does not have a
2126 // type constraint. This is unlikely to occur except by looking through copies
2127 // but it is possible for the initial register being queried to be in this
2128 // state.
2129 if (!DstTy.isValid())
2130 return 1;
2131
2132 unsigned FirstAnswer = 1;
2133 switch (Opcode) {
2134 case TargetOpcode::COPY: {
2135 MachineOperand &Src = MI.getOperand(i: 1);
2136 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
2137 MRI.getType(Reg: Src.getReg()).isValid()) {
2138 // Don't increment Depth for this one since we didn't do any work.
2139 return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth);
2140 }
2141
2142 return 1;
2143 }
2144 case TargetOpcode::G_SEXT: {
2145 Register Src = MI.getOperand(i: 1).getReg();
2146 LLT SrcTy = MRI.getType(Reg: Src);
2147 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
2148 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp;
2149 }
2150 case TargetOpcode::G_ASSERT_SEXT:
2151 case TargetOpcode::G_SEXT_INREG: {
2152 // Max of the input and what this extends.
2153 Register Src = MI.getOperand(i: 1).getReg();
2154 unsigned SrcBits = MI.getOperand(i: 2).getImm();
2155 unsigned InRegBits = TyBits - SrcBits + 1;
2156 return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1),
2157 b: InRegBits);
2158 }
2159 case TargetOpcode::G_LOAD: {
2160 GLoad *Ld = cast<GLoad>(Val: &MI);
2161 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
2162 break;
2163
2164 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2165 }
2166 case TargetOpcode::G_SEXTLOAD: {
2167 GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI);
2168
2169 // FIXME: We need an in-memory type representation.
2170 if (DstTy.isVector())
2171 return 1;
2172
2173 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2174 if (NumBits != 1)
2175 return NumBits;
2176
2177 // e.g. i16->i32 = '17' bits known.
2178 const MachineMemOperand *MMO = *MI.memoperands_begin();
2179 return TyBits - MMO->getSizeInBits().getValue() + 1;
2180 }
2181 case TargetOpcode::G_ZEXTLOAD: {
2182 GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI);
2183
2184 // FIXME: We need an in-memory type representation.
2185 if (DstTy.isVector())
2186 return 1;
2187
2188 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2189 if (NumBits != 1)
2190 return NumBits;
2191
2192 // e.g. i16->i32 = '16' bits known.
2193 const MachineMemOperand *MMO = *MI.memoperands_begin();
2194 return TyBits - MMO->getSizeInBits().getValue();
2195 }
2196 case TargetOpcode::G_AND:
2197 case TargetOpcode::G_OR:
2198 case TargetOpcode::G_XOR: {
2199 Register Src1 = MI.getOperand(i: 1).getReg();
2200 unsigned Src1NumSignBits =
2201 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2202 if (Src1NumSignBits != 1) {
2203 Register Src2 = MI.getOperand(i: 2).getReg();
2204 unsigned Src2NumSignBits =
2205 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2206 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits);
2207 }
2208 break;
2209 }
2210 case TargetOpcode::G_ASHR: {
2211 Register Src1 = MI.getOperand(i: 1).getReg();
2212 Register Src2 = MI.getOperand(i: 2).getReg();
2213 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2214 if (auto C = getValidMinimumShiftAmount(R: Src2, DemandedElts, Depth: Depth + 1))
2215 FirstAnswer = std::min<uint64_t>(a: FirstAnswer + *C, b: TyBits);
2216 break;
2217 }
2218 case TargetOpcode::G_SHL: {
2219 Register Src1 = MI.getOperand(i: 1).getReg();
2220 Register Src2 = MI.getOperand(i: 2).getReg();
2221 if (std::optional<ConstantRange> ShAmtRange =
2222 getValidShiftAmountRange(R: Src2, DemandedElts, Depth: Depth + 1)) {
2223 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
2224 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
2225
2226 MachineInstr &ExtMI = *MRI.getVRegDef(Reg: Src1);
2227 unsigned ExtOpc = ExtMI.getOpcode();
2228
2229 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
2230 // shifted out, then we can compute the number of sign bits for the
2231 // operand being extended. A future improvement could be to pass along the
2232 // "shifted left by" information in the recursive calls to
2233 // ComputeKnownSignBits. Allowing us to handle this more generically.
2234 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
2235 ExtOpc == TargetOpcode::G_ANYEXT) {
2236 LLT ExtTy = MRI.getType(Reg: Src1);
2237 Register Extendee = ExtMI.getOperand(i: 1).getReg();
2238 LLT ExtendeeTy = MRI.getType(Reg: Extendee);
2239 uint64_t SizeDiff =
2240 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
2241
2242 if (SizeDiff <= MinShAmt) {
2243 unsigned Tmp =
2244 SizeDiff + computeNumSignBits(R: Extendee, DemandedElts, Depth: Depth + 1);
2245 if (MaxShAmt < Tmp)
2246 return Tmp - MaxShAmt;
2247 }
2248 }
2249 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
2250 unsigned Tmp = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2251 if (MaxShAmt < Tmp)
2252 return Tmp - MaxShAmt;
2253 }
2254 break;
2255 }
2256 case TargetOpcode::G_SREM: {
2257 // The sign bit is the LHS's sign bit, except when the result of the
2258 // remainder is zero. The magnitude of the result should be less than or
2259 // equal to the magnitude of the LHS. Therefore, the result should have
2260 // at least as many sign bits as the left hand side.
2261 Register Src = MI.getOperand(i: 1).getReg();
2262 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2263 }
2264 case TargetOpcode::G_TRUNC: {
2265 Register Src = MI.getOperand(i: 1).getReg();
2266 LLT SrcTy = MRI.getType(Reg: Src);
2267
2268 // Check if the sign bits of source go down as far as the truncated value.
2269 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2270 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2271 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2272 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2273 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2274 break;
2275 }
2276 case TargetOpcode::G_SELECT: {
2277 return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(),
2278 Src1: MI.getOperand(i: 3).getReg(), DemandedElts,
2279 Depth: Depth + 1);
2280 }
2281 case TargetOpcode::G_SMIN:
2282 case TargetOpcode::G_SMAX:
2283 case TargetOpcode::G_UMIN:
2284 case TargetOpcode::G_UMAX:
2285 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2286 return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(),
2287 Src1: MI.getOperand(i: 2).getReg(), DemandedElts,
2288 Depth: Depth + 1);
2289 case TargetOpcode::G_SADDO:
2290 case TargetOpcode::G_SADDE:
2291 case TargetOpcode::G_UADDO:
2292 case TargetOpcode::G_UADDE:
2293 case TargetOpcode::G_SSUBO:
2294 case TargetOpcode::G_SSUBE:
2295 case TargetOpcode::G_USUBO:
2296 case TargetOpcode::G_USUBE:
2297 case TargetOpcode::G_SMULO:
2298 case TargetOpcode::G_UMULO: {
2299 // If compares returns 0/-1, all bits are sign bits.
2300 // We know that we have an integer-based boolean since these operations
2301 // are only available for integer.
2302 if (MI.getOperand(i: 1).getReg() == R) {
2303 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
2304 TargetLowering::ZeroOrNegativeOneBooleanContent)
2305 return TyBits;
2306 }
2307
2308 break;
2309 }
2310 case TargetOpcode::G_SUB: {
2311 Register Src2 = MI.getOperand(i: 2).getReg();
2312 unsigned Src2NumSignBits =
2313 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2314 if (Src2NumSignBits == 1)
2315 return 1; // Early out.
2316
2317 // Handle NEG.
2318 Register Src1 = MI.getOperand(i: 1).getReg();
2319 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2320 if (Known1.isZero()) {
2321 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2322 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2323 // sign bits set.
2324 if ((Known2.Zero | 1).isAllOnes())
2325 return TyBits;
2326
2327 // If the input is known to be positive (the sign bit is known clear),
2328 // the output of the NEG has, at worst, the same number of sign bits as
2329 // the input.
2330 if (Known2.isNonNegative()) {
2331 FirstAnswer = Src2NumSignBits;
2332 break;
2333 }
2334
2335 // Otherwise, we treat this like a SUB.
2336 }
2337
2338 unsigned Src1NumSignBits =
2339 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2340 if (Src1NumSignBits == 1)
2341 return 1; // Early Out.
2342
2343 // Sub can have at most one carry bit. Thus we know that the output
2344 // is, at worst, one more bit than the inputs.
2345 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2346 break;
2347 }
2348 case TargetOpcode::G_ADD: {
2349 Register Src2 = MI.getOperand(i: 2).getReg();
2350 unsigned Src2NumSignBits =
2351 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2352 if (Src2NumSignBits <= 2)
2353 return 1; // Early out.
2354
2355 Register Src1 = MI.getOperand(i: 1).getReg();
2356 unsigned Src1NumSignBits =
2357 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2358 if (Src1NumSignBits == 1)
2359 return 1; // Early Out.
2360
2361 // Special case decrementing a value (ADD X, -1):
2362 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2363 if (Known2.isAllOnes()) {
2364 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2365 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2366 // sign bits set.
2367 if ((Known1.Zero | 1).isAllOnes())
2368 return TyBits;
2369
2370 // If we are subtracting one from a positive number, there is no carry
2371 // out of the result.
2372 if (Known1.isNonNegative()) {
2373 FirstAnswer = Src1NumSignBits;
2374 break;
2375 }
2376
2377 // Otherwise, we treat this like an ADD.
2378 }
2379
2380 // Add can have at most one carry bit. Thus we know that the output
2381 // is, at worst, one more bit than the inputs.
2382 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2383 break;
2384 }
2385 case TargetOpcode::G_FCMP:
2386 case TargetOpcode::G_ICMP: {
2387 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2388 if (TyBits == 1)
2389 break;
2390 auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP);
2391 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
2392 return TyBits; // All bits are sign bits.
2393 if (BC == TargetLowering::ZeroOrOneBooleanContent)
2394 return TyBits - 1; // Every always-zero bit is a sign bit.
2395 break;
2396 }
2397 case TargetOpcode::G_BUILD_VECTOR: {
2398 // Collect the known bits that are shared by every demanded vector element.
2399 FirstAnswer = TyBits;
2400 APInt SingleDemandedElt(1, 1);
2401 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2402 if (!DemandedElts[I])
2403 continue;
2404
2405 unsigned Tmp2 =
2406 computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1);
2407 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2408
2409 // If we don't know any bits, early out.
2410 if (FirstAnswer == 1)
2411 break;
2412 }
2413 break;
2414 }
2415 case TargetOpcode::G_CONCAT_VECTORS: {
2416 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
2417 break;
2418 FirstAnswer = TyBits;
2419 // Determine the minimum number of sign bits across all demanded
2420 // elts of the input vectors. Early out if the result is already 1.
2421 unsigned NumSubVectorElts =
2422 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
2423 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2424 APInt DemandedSub =
2425 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
2426 if (!DemandedSub)
2427 continue;
2428 unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1);
2429
2430 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2431
2432 // If we don't know any bits, early out.
2433 if (FirstAnswer == 1)
2434 break;
2435 }
2436 break;
2437 }
2438 case TargetOpcode::G_SHUFFLE_VECTOR: {
2439 // Collect the minimum number of sign bits that are shared by every vector
2440 // element referenced by the shuffle.
2441 APInt DemandedLHS, DemandedRHS;
2442 Register Src1 = MI.getOperand(i: 1).getReg();
2443 unsigned NumElts = MRI.getType(Reg: Src1).getNumElements();
2444 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
2445 DemandedElts, DemandedLHS, DemandedRHS))
2446 return 1;
2447
2448 if (!!DemandedLHS)
2449 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1);
2450 // If we don't know anything, early out and try computeKnownBits fall-back.
2451 if (FirstAnswer == 1)
2452 break;
2453 if (!!DemandedRHS) {
2454 unsigned Tmp2 =
2455 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1);
2456 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2457 }
2458 break;
2459 }
2460 case TargetOpcode::G_SPLAT_VECTOR: {
2461 // Check if the sign bits of source go down as far as the truncated value.
2462 Register Src = MI.getOperand(i: 1).getReg();
2463 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1);
2464 unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits();
2465 if (NumSrcSignBits > (NumSrcBits - TyBits))
2466 return NumSrcSignBits - (NumSrcBits - TyBits);
2467 break;
2468 }
2469 case TargetOpcode::G_INTRINSIC:
2470 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2471 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2472 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2473 default: {
2474 unsigned NumBits =
2475 TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth);
2476 if (NumBits > 1)
2477 FirstAnswer = std::max(a: FirstAnswer, b: NumBits);
2478 break;
2479 }
2480 }
2481
2482 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2483 // use this information.
2484 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2485 APInt Mask;
2486 if (Known.isNonNegative()) { // sign bit is 0
2487 Mask = Known.Zero;
2488 } else if (Known.isNegative()) { // sign bit is 1;
2489 Mask = Known.One;
2490 } else {
2491 // Nothing known.
2492 return FirstAnswer;
2493 }
2494
2495 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2496 // the number of identical bits in the top of the input value.
2497 Mask <<= Mask.getBitWidth() - TyBits;
2498 return std::max(a: FirstAnswer, b: Mask.countl_one());
2499}
2500
2501unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2502 LLT Ty = MRI.getType(Reg: R);
2503 APInt DemandedElts =
2504 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
2505 return computeNumSignBits(R, DemandedElts, Depth);
2506}
2507
2508std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
2509 Register R, const APInt &DemandedElts, unsigned Depth) {
2510 // Shifting more than the bitwidth is not valid.
2511 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2512 unsigned Opcode = MI.getOpcode();
2513
2514 LLT Ty = MRI.getType(Reg: R);
2515 unsigned BitWidth = Ty.getScalarSizeInBits();
2516
2517 if (Opcode == TargetOpcode::G_CONSTANT) {
2518 const APInt &ShAmt = MI.getOperand(i: 1).getCImm()->getValue();
2519 if (ShAmt.uge(RHS: BitWidth))
2520 return std::nullopt;
2521 return ConstantRange(ShAmt);
2522 }
2523
2524 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2525 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2526 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2527 if (!DemandedElts[I])
2528 continue;
2529 MachineInstr *Op = MRI.getVRegDef(Reg: MI.getOperand(i: I + 1).getReg());
2530 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2531 MinAmt = MaxAmt = nullptr;
2532 break;
2533 }
2534
2535 const APInt &ShAmt = Op->getOperand(i: 1).getCImm()->getValue();
2536 if (ShAmt.uge(RHS: BitWidth))
2537 return std::nullopt;
2538 if (!MinAmt || MinAmt->ugt(RHS: ShAmt))
2539 MinAmt = &ShAmt;
2540 if (!MaxAmt || MaxAmt->ult(RHS: ShAmt))
2541 MaxAmt = &ShAmt;
2542 }
2543 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2544 "Failed to find matching min/max shift amounts");
2545 if (MinAmt && MaxAmt)
2546 return ConstantRange(*MinAmt, *MaxAmt + 1);
2547 }
2548
2549 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2550 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2551 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2552 if (KnownAmt.getMaxValue().ult(RHS: BitWidth))
2553 return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false);
2554
2555 return std::nullopt;
2556}
2557
2558std::optional<uint64_t> GISelValueTracking::getValidMinimumShiftAmount(
2559 Register R, const APInt &DemandedElts, unsigned Depth) {
2560 if (std::optional<ConstantRange> AmtRange =
2561 getValidShiftAmountRange(R, DemandedElts, Depth))
2562 return AmtRange->getUnsignedMin().getZExtValue();
2563 return std::nullopt;
2564}
2565
2566void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2567 AnalysisUsage &AU) const {
2568 AU.setPreservesAll();
2569 MachineFunctionPass::getAnalysisUsage(AU);
2570}
2571
2572bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2573 MachineFunction &MF) {
2574 return false;
2575}
2576
2577GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2578 if (!Info) {
2579 unsigned MaxDepth =
2580 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2581 Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth);
2582 }
2583 return *Info;
2584}
2585
2586AnalysisKey GISelValueTrackingAnalysis::Key;
2587
2588GISelValueTracking
2589GISelValueTrackingAnalysis::run(MachineFunction &MF,
2590 MachineFunctionAnalysisManager &MFAM) {
2591 return Result(MF);
2592}
2593
2594PreservedAnalyses
2595GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2596 MachineFunctionAnalysisManager &MFAM) {
2597 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
2598 const auto &MRI = MF.getRegInfo();
2599 OS << "name: ";
2600 MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false);
2601 OS << '\n';
2602
2603 for (MachineBasicBlock &BB : MF) {
2604 for (MachineInstr &MI : BB) {
2605 for (MachineOperand &MO : MI.defs()) {
2606 if (!MO.isReg() || MO.getReg().isPhysical())
2607 continue;
2608 Register Reg = MO.getReg();
2609 if (!MRI.getType(Reg).isValid())
2610 continue;
2611 KnownBits Known = VTA.getKnownBits(R: Reg);
2612 unsigned SignedBits = VTA.computeNumSignBits(R: Reg);
2613 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2614 << '\n';
2615 };
2616 }
2617 }
2618 return PreservedAnalyses::all();
2619}
2620