1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
14#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/FloatingPointMode.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/IR/ConstantRange.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/FMF.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/KnownBits.h"
39#include "llvm/Support/KnownFPClass.h"
40#include "llvm/Target/TargetMachine.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
47char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
48
49INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
50 "Analysis for ComputingKnownBits", false, true)
51
52GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
56Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
57 const MachineInstr *MI = MRI.getVRegDef(Reg: R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(i: 2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(i: 1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1);
75 }
76}
77
78KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(R: MI.getOperand(i: 0).getReg());
82}
83
84KnownBits GISelValueTracking::getKnownBits(Register R) {
85 const LLT Ty = MRI.getType(Reg: R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
94KnownBits GISelValueTracking::getKnownBits(Register R,
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
102bool GISelValueTracking::signBitIsZero(Register R) {
103 LLT Ty = MRI.getType(Reg: R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
105 return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth));
106}
107
108APInt GISelValueTracking::getKnownZeroes(Register R) {
109 return getKnownBits(R).Zero;
110}
111
112APInt GISelValueTracking::getKnownOnes(Register R) {
113 return getKnownBits(R).One;
114}
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(RHS: Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
154 numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth));
155 Mask.One = APInt::getLowBitsSet(
156 numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth));
157 return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask;
158}
159
160void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(Reg: R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(RHS: Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1),
231 Depth: Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
240 Known.One = APInt::getAllOnes(numBits: BitWidth);
241 Known.Zero = APInt::getAllOnes(numBits: BitWidth);
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(i: Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 // In case we're forwarding from a vector register to a non-vector
262 // register we need to update the demanded elements to reflect this
263 // before recursing.
264 APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
265 ? APInt::getAllOnes(numBits: SrcTy.getNumElements())
266 : DemandedElts; // Known to be APInt(1, 1)
267 // For COPYs we don't do anything, don't increase the depth.
268 computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts: NowDemandedElts,
269 Depth: Depth + (Opcode != TargetOpcode::COPY));
270 Known2 = Known2.anyextOrTrunc(BitWidth);
271 Known = Known.intersectWith(RHS: Known2);
272 // If we reach a point where we don't know anything
273 // just stop looking through the operands.
274 if (Known.isUnknown())
275 break;
276 } else {
277 // We know nothing.
278 Known = KnownBits(BitWidth);
279 break;
280 }
281 }
282 break;
283 }
284 case TargetOpcode::G_CONSTANT: {
285 Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue());
286 break;
287 }
288 case TargetOpcode::G_FRAME_INDEX: {
289 int FrameIdx = MI.getOperand(i: 1).getIndex();
290 TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF);
291 break;
292 }
293 case TargetOpcode::G_SUB: {
294 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
295 Depth: Depth + 1);
296 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
297 Depth: Depth + 1);
298 Known = KnownBits::sub(LHS: Known, RHS: Known2);
299 break;
300 }
301 case TargetOpcode::G_XOR: {
302 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
303 Depth: Depth + 1);
304 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
305 Depth: Depth + 1);
306
307 Known ^= Known2;
308 break;
309 }
310 case TargetOpcode::G_PTR_ADD: {
311 if (DstTy.isVector())
312 break;
313 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
314 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
315 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
316 break;
317 [[fallthrough]];
318 }
319 case TargetOpcode::G_ADD: {
320 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
321 Depth: Depth + 1);
322 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
323 Depth: Depth + 1);
324 Known = KnownBits::add(LHS: Known, RHS: Known2);
325 break;
326 }
327 case TargetOpcode::G_AND: {
328 // If either the LHS or the RHS are Zero, the result is zero.
329 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
330 Depth: Depth + 1);
331 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
332 Depth: Depth + 1);
333
334 Known &= Known2;
335 break;
336 }
337 case TargetOpcode::G_OR: {
338 // If either the LHS or the RHS are Zero, the result is zero.
339 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
340 Depth: Depth + 1);
341 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
342 Depth: Depth + 1);
343
344 Known |= Known2;
345 break;
346 }
347 case TargetOpcode::G_MUL: {
348 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
349 Depth: Depth + 1);
350 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
351 Depth: Depth + 1);
352 Known = KnownBits::mul(LHS: Known, RHS: Known2);
353 break;
354 }
355 case TargetOpcode::G_UMULH: {
356 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
357 Depth: Depth + 1);
358 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
359 Depth: Depth + 1);
360 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
361 break;
362 }
363 case TargetOpcode::G_SMULH: {
364 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
365 Depth: Depth + 1);
366 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
367 Depth: Depth + 1);
368 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
369 break;
370 }
371 case TargetOpcode::G_SELECT: {
372 computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(),
373 Known, DemandedElts, Depth: Depth + 1);
374 break;
375 }
376 case TargetOpcode::G_SMIN: {
377 // TODO: Handle clamp pattern with number of sign bits
378 KnownBits KnownRHS;
379 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
380 Depth: Depth + 1);
381 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
382 Depth: Depth + 1);
383 Known = KnownBits::smin(LHS: Known, RHS: KnownRHS);
384 break;
385 }
386 case TargetOpcode::G_SMAX: {
387 // TODO: Handle clamp pattern with number of sign bits
388 KnownBits KnownRHS;
389 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
390 Depth: Depth + 1);
391 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
392 Depth: Depth + 1);
393 Known = KnownBits::smax(LHS: Known, RHS: KnownRHS);
394 break;
395 }
396 case TargetOpcode::G_UMIN: {
397 KnownBits KnownRHS;
398 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
399 Depth: Depth + 1);
400 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
401 Depth: Depth + 1);
402 Known = KnownBits::umin(LHS: Known, RHS: KnownRHS);
403 break;
404 }
405 case TargetOpcode::G_UMAX: {
406 KnownBits KnownRHS;
407 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
408 Depth: Depth + 1);
409 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
410 Depth: Depth + 1);
411 Known = KnownBits::umax(LHS: Known, RHS: KnownRHS);
412 break;
413 }
414 case TargetOpcode::G_FCMP:
415 case TargetOpcode::G_ICMP: {
416 if (DstTy.isVector())
417 break;
418 if (TL.getBooleanContents(isVec: DstTy.isVector(),
419 isFloat: Opcode == TargetOpcode::G_FCMP) ==
420 TargetLowering::ZeroOrOneBooleanContent &&
421 BitWidth > 1)
422 Known.Zero.setBitsFrom(1);
423 break;
424 }
425 case TargetOpcode::G_SEXT: {
426 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
427 Depth: Depth + 1);
428 // If the sign bit is known to be zero or one, then sext will extend
429 // it to the top bits, else it will just zext.
430 Known = Known.sext(BitWidth);
431 break;
432 }
433 case TargetOpcode::G_ASSERT_SEXT:
434 case TargetOpcode::G_SEXT_INREG: {
435 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
436 Depth: Depth + 1);
437 Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm());
438 break;
439 }
440 case TargetOpcode::G_ANYEXT: {
441 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
442 Depth: Depth + 1);
443 Known = Known.anyext(BitWidth);
444 break;
445 }
446 case TargetOpcode::G_LOAD: {
447 const MachineMemOperand *MMO = *MI.memoperands_begin();
448 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
449 if (const MDNode *Ranges = MMO->getRanges())
450 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
451 Known = KnownRange.anyext(BitWidth: Known.getBitWidth());
452 break;
453 }
454 case TargetOpcode::G_SEXTLOAD:
455 case TargetOpcode::G_ZEXTLOAD: {
456 if (DstTy.isVector())
457 break;
458 const MachineMemOperand *MMO = *MI.memoperands_begin();
459 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
460 if (const MDNode *Ranges = MMO->getRanges())
461 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
462 Known = Opcode == TargetOpcode::G_SEXTLOAD
463 ? KnownRange.sext(BitWidth: Known.getBitWidth())
464 : KnownRange.zext(BitWidth: Known.getBitWidth());
465 break;
466 }
467 case TargetOpcode::G_ASHR: {
468 KnownBits LHSKnown, RHSKnown;
469 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
470 Depth: Depth + 1);
471 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
472 Depth: Depth + 1);
473 Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown);
474 break;
475 }
476 case TargetOpcode::G_LSHR: {
477 KnownBits LHSKnown, RHSKnown;
478 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
479 Depth: Depth + 1);
480 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
481 Depth: Depth + 1);
482 Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown);
483 break;
484 }
485 case TargetOpcode::G_SHL: {
486 KnownBits LHSKnown, RHSKnown;
487 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
488 Depth: Depth + 1);
489 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
490 Depth: Depth + 1);
491 Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown);
492 break;
493 }
494 case TargetOpcode::G_INTTOPTR:
495 case TargetOpcode::G_PTRTOINT:
496 if (DstTy.isVector())
497 break;
498 // Fall through and handle them the same as zext/trunc.
499 [[fallthrough]];
500 case TargetOpcode::G_ZEXT:
501 case TargetOpcode::G_TRUNC: {
502 Register SrcReg = MI.getOperand(i: 1).getReg();
503 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
504 Known = Known.zextOrTrunc(BitWidth);
505 break;
506 }
507 case TargetOpcode::G_ASSERT_ZEXT: {
508 Register SrcReg = MI.getOperand(i: 1).getReg();
509 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
510
511 unsigned SrcBitWidth = MI.getOperand(i: 2).getImm();
512 assert(SrcBitWidth && "SrcBitWidth can't be zero");
513 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth);
514 Known.Zero |= (~InMask);
515 Known.One &= (~Known.Zero);
516 break;
517 }
518 case TargetOpcode::G_ASSERT_ALIGN: {
519 int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm());
520
521 // TODO: Should use maximum with source
522 // If a node is guaranteed to be aligned, set low zero bits accordingly as
523 // well as clearing one bits.
524 Known.Zero.setLowBits(LogOfAlign);
525 Known.One.clearLowBits(loBits: LogOfAlign);
526 break;
527 }
528 case TargetOpcode::G_MERGE_VALUES: {
529 unsigned NumOps = MI.getNumOperands();
530 unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
531
532 for (unsigned I = 0; I != NumOps - 1; ++I) {
533 KnownBits SrcOpKnown;
534 computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown,
535 DemandedElts, Depth: Depth + 1);
536 Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize);
537 }
538 break;
539 }
540 case TargetOpcode::G_UNMERGE_VALUES: {
541 unsigned NumOps = MI.getNumOperands();
542 Register SrcReg = MI.getOperand(i: NumOps - 1).getReg();
543 LLT SrcTy = MRI.getType(Reg: SrcReg);
544
545 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
546 return; // TODO: Handle vector->subelement unmerges
547
548 // Figure out the result operand index
549 unsigned DstIdx = 0;
550 for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R;
551 ++DstIdx)
552 ;
553
554 APInt SubDemandedElts = DemandedElts;
555 if (SrcTy.isVector()) {
556 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
557 SubDemandedElts =
558 DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes);
559 }
560
561 KnownBits SrcOpKnown;
562 computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1);
563
564 if (SrcTy.isVector())
565 Known = std::move(SrcOpKnown);
566 else
567 Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx);
568 break;
569 }
570 case TargetOpcode::G_BSWAP: {
571 Register SrcReg = MI.getOperand(i: 1).getReg();
572 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
573 Known = Known.byteSwap();
574 break;
575 }
576 case TargetOpcode::G_BITREVERSE: {
577 Register SrcReg = MI.getOperand(i: 1).getReg();
578 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
579 Known = Known.reverseBits();
580 break;
581 }
582 case TargetOpcode::G_CTPOP: {
583 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
584 Depth: Depth + 1);
585 // We can bound the space the count needs. Also, bits known to be zero
586 // can't contribute to the population.
587 unsigned BitsPossiblySet = Known2.countMaxPopulation();
588 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
589 Known.Zero.setBitsFrom(LowBits);
590 // TODO: we could bound Known.One using the lower bound on the number of
591 // bits which might be set provided by popcnt KnownOne2.
592 break;
593 }
594 case TargetOpcode::G_UBFX: {
595 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
596 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
597 Depth: Depth + 1);
598 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
599 Depth: Depth + 1);
600 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
601 Depth: Depth + 1);
602 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
603 break;
604 }
605 case TargetOpcode::G_SBFX: {
606 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
607 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
608 Depth: Depth + 1);
609 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
610 Depth: Depth + 1);
611 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
612 Depth: Depth + 1);
613 OffsetKnown = OffsetKnown.sext(BitWidth);
614 WidthKnown = WidthKnown.sext(BitWidth);
615 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
616 // Sign extend the extracted value using shift left and arithmetic shift
617 // right.
618 KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth));
619 KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown);
620 Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown);
621 break;
622 }
623 case TargetOpcode::G_UADDO:
624 case TargetOpcode::G_UADDE:
625 case TargetOpcode::G_SADDO:
626 case TargetOpcode::G_SADDE: {
627 if (MI.getOperand(i: 1).getReg() == R) {
628 // If we know the result of a compare has the top bits zero, use this
629 // info.
630 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
631 TargetLowering::ZeroOrOneBooleanContent &&
632 BitWidth > 1)
633 Known.Zero.setBitsFrom(1);
634 break;
635 }
636
637 assert(MI.getOperand(0).getReg() == R &&
638 "We only compute knownbits for the sum here.");
639 // With [US]ADDE, a carry bit may be added in.
640 KnownBits Carry(1);
641 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
642 computeKnownBitsImpl(R: MI.getOperand(i: 4).getReg(), Known&: Carry, DemandedElts,
643 Depth: Depth + 1);
644 // Carry has bit width 1
645 Carry = Carry.trunc(BitWidth: 1);
646 } else {
647 Carry.setAllZero();
648 }
649
650 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
651 Depth: Depth + 1);
652 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: Known2, DemandedElts,
653 Depth: Depth + 1);
654 Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry);
655 break;
656 }
657 case TargetOpcode::G_USUBO:
658 case TargetOpcode::G_USUBE:
659 case TargetOpcode::G_SSUBO:
660 case TargetOpcode::G_SSUBE:
661 case TargetOpcode::G_UMULO:
662 case TargetOpcode::G_SMULO: {
663 if (MI.getOperand(i: 1).getReg() == R) {
664 // If we know the result of a compare has the top bits zero, use this
665 // info.
666 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
667 TargetLowering::ZeroOrOneBooleanContent &&
668 BitWidth > 1)
669 Known.Zero.setBitsFrom(1);
670 }
671 break;
672 }
673 case TargetOpcode::G_CTLZ:
674 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
675 KnownBits SrcOpKnown;
676 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
677 Depth: Depth + 1);
678 // If we have a known 1, its position is our upper bound.
679 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
680 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
681 Known.Zero.setBitsFrom(LowBits);
682 break;
683 }
684 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
685 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
686 Register InVec = Extract.getVectorReg();
687 Register EltNo = Extract.getIndexReg();
688
689 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
690
691 LLT VecVT = MRI.getType(Reg: InVec);
692 // computeKnownBits not yet implemented for scalable vectors.
693 if (VecVT.isScalableVector())
694 break;
695
696 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
697 const unsigned NumSrcElts = VecVT.getNumElements();
698 // A return type different from the vector's element type may lead to
699 // issues with pattern selection. Bail out to avoid that.
700 if (BitWidth > EltBitWidth)
701 break;
702
703 Known.Zero.setAllBits();
704 Known.One.setAllBits();
705
706 // If we know the element index, just demand that vector element, else for
707 // an unknown element index, ignore DemandedElts and demand them all.
708 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
709 if (ConstEltNo && ConstEltNo->ult(RHS: NumSrcElts))
710 DemandedSrcElts =
711 APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue());
712
713 computeKnownBitsImpl(R: InVec, Known, DemandedElts: DemandedSrcElts, Depth: Depth + 1);
714 break;
715 }
716 case TargetOpcode::G_SHUFFLE_VECTOR: {
717 APInt DemandedLHS, DemandedRHS;
718 // Collect the known bits that are shared by every vector element referenced
719 // by the shuffle.
720 unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
721 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
722 DemandedElts, DemandedLHS, DemandedRHS))
723 break;
724
725 // Known bits are the values that are shared by every demanded element.
726 Known.Zero.setAllBits();
727 Known.One.setAllBits();
728 if (!!DemandedLHS) {
729 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS,
730 Depth: Depth + 1);
731 Known = Known.intersectWith(RHS: Known2);
732 }
733 // If we don't know any bits, early out.
734 if (Known.isUnknown())
735 break;
736 if (!!DemandedRHS) {
737 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS,
738 Depth: Depth + 1);
739 Known = Known.intersectWith(RHS: Known2);
740 }
741 break;
742 }
743 case TargetOpcode::G_CONCAT_VECTORS: {
744 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
745 break;
746 // Split DemandedElts and test each of the demanded subvectors.
747 Known.Zero.setAllBits();
748 Known.One.setAllBits();
749 unsigned NumSubVectorElts =
750 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
751
752 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
753 APInt DemandedSub =
754 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
755 if (!!DemandedSub) {
756 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1);
757
758 Known = Known.intersectWith(RHS: Known2);
759 }
760 // If we don't know any bits, early out.
761 if (Known.isUnknown())
762 break;
763 }
764 break;
765 }
766 case TargetOpcode::G_ABS: {
767 Register SrcReg = MI.getOperand(i: 1).getReg();
768 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
769 Known = Known.abs();
770 Known.Zero.setHighBits(computeNumSignBits(R: SrcReg, DemandedElts, Depth: Depth + 1) -
771 1);
772 break;
773 }
774 }
775
776 LLVM_DEBUG(dumpResult(MI, Known, Depth));
777}
778
779static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
780 Ty = Ty.getScalarType();
781 DenormalMode Mode = MF.getDenormalMode(FPType: getFltSemanticForLLT(Ty));
782 return Mode.Output == DenormalMode::IEEE ||
783 Mode.Output == DenormalMode::PositiveZero;
784}
785
786void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
787 FPClassTest InterestedClasses,
788 unsigned Depth) {
789 LLT Ty = MRI.getType(Reg: R);
790 APInt DemandedElts =
791 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
792 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
793}
794
795void GISelValueTracking::computeKnownFPClassForFPTrunc(
796 const MachineInstr &MI, const APInt &DemandedElts,
797 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
798 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
799 fcNone)
800 return;
801
802 Register Val = MI.getOperand(i: 1).getReg();
803 KnownFPClass KnownSrc;
804 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
805 Depth: Depth + 1);
806
807 // Sign should be preserved
808 // TODO: Handle cannot be ordered greater than zero
809 if (KnownSrc.cannotBeOrderedLessThanZero())
810 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
811
812 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
813
814 // Infinity needs a range check.
815}
816
817void GISelValueTracking::computeKnownFPClass(Register R,
818 const APInt &DemandedElts,
819 FPClassTest InterestedClasses,
820 KnownFPClass &Known,
821 unsigned Depth) {
822 assert(Known.isUnknown() && "should not be called with known information");
823
824 if (!DemandedElts) {
825 // No demanded elts, better to assume we don't know anything.
826 Known.resetAll();
827 return;
828 }
829
830 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
831
832 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
833 unsigned Opcode = MI.getOpcode();
834 LLT DstTy = MRI.getType(Reg: R);
835
836 if (!DstTy.isValid()) {
837 Known.resetAll();
838 return;
839 }
840
841 if (auto Cst = GFConstant::getConstant(Const: R, MRI)) {
842 switch (Cst->getKind()) {
843 case GFConstant::GFConstantKind::Scalar: {
844 auto APF = Cst->getScalarValue();
845 Known.KnownFPClasses = APF.classify();
846 Known.SignBit = APF.isNegative();
847 break;
848 }
849 case GFConstant::GFConstantKind::FixedVector: {
850 Known.KnownFPClasses = fcNone;
851 bool SignBitAllZero = true;
852 bool SignBitAllOne = true;
853
854 for (auto C : *Cst) {
855 Known.KnownFPClasses |= C.classify();
856 if (C.isNegative())
857 SignBitAllZero = false;
858 else
859 SignBitAllOne = false;
860 }
861
862 if (SignBitAllOne != SignBitAllZero)
863 Known.SignBit = SignBitAllOne;
864
865 break;
866 }
867 case GFConstant::GFConstantKind::ScalableVector: {
868 Known.resetAll();
869 break;
870 }
871 }
872
873 return;
874 }
875
876 FPClassTest KnownNotFromFlags = fcNone;
877 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans))
878 KnownNotFromFlags |= fcNan;
879 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs))
880 KnownNotFromFlags |= fcInf;
881
882 // We no longer need to find out about these bits from inputs if we can
883 // assume this from flags/attributes.
884 InterestedClasses &= ~KnownNotFromFlags;
885
886 llvm::scope_exit ClearClassesFromFlags(
887 [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); });
888
889 // All recursive calls that increase depth must come after this.
890 if (Depth == MaxAnalysisRecursionDepth)
891 return;
892
893 const MachineFunction *MF = MI.getMF();
894
895 switch (Opcode) {
896 default:
897 TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
898 Depth);
899 break;
900 case TargetOpcode::G_FNEG: {
901 Register Val = MI.getOperand(i: 1).getReg();
902 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
903 Known.fneg();
904 break;
905 }
906 case TargetOpcode::G_SELECT: {
907 GSelect &SelMI = cast<GSelect>(Val&: MI);
908 Register Cond = SelMI.getCondReg();
909 Register LHS = SelMI.getTrueReg();
910 Register RHS = SelMI.getFalseReg();
911
912 FPClassTest FilterLHS = fcAllFlags;
913 FPClassTest FilterRHS = fcAllFlags;
914
915 Register TestedValue;
916 FPClassTest MaskIfTrue = fcAllFlags;
917 FPClassTest MaskIfFalse = fcAllFlags;
918 FPClassTest ClassVal = fcNone;
919
920 CmpInst::Predicate Pred;
921 Register CmpLHS, CmpRHS;
922 if (mi_match(R: Cond, MRI,
923 P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) {
924 // If the select filters out a value based on the class, it no longer
925 // participates in the class of the result
926
927 // TODO: In some degenerate cases we can infer something if we try again
928 // without looking through sign operations.
929 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
930 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
931 fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
932 } else if (mi_match(
933 R: Cond, MRI,
934 P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) {
935 FPClassTest TestedMask = ClassVal;
936 MaskIfTrue = TestedMask;
937 MaskIfFalse = ~TestedMask;
938 }
939
940 if (TestedValue == LHS) {
941 // match !isnan(x) ? x : y
942 FilterLHS = MaskIfTrue;
943 } else if (TestedValue == RHS) { // && IsExactClass
944 // match !isnan(x) ? y : x
945 FilterRHS = MaskIfFalse;
946 }
947
948 KnownFPClass Known2;
949 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
950 Depth: Depth + 1);
951 Known.KnownFPClasses &= FilterLHS;
952
953 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
954 Known&: Known2, Depth: Depth + 1);
955 Known2.KnownFPClasses &= FilterRHS;
956
957 Known |= Known2;
958 break;
959 }
960 case TargetOpcode::G_FCOPYSIGN: {
961 Register Magnitude = MI.getOperand(i: 1).getReg();
962 Register Sign = MI.getOperand(i: 2).getReg();
963
964 KnownFPClass KnownSign;
965
966 computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known,
967 Depth: Depth + 1);
968 computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign,
969 Depth: Depth + 1);
970 Known.copysign(Sign: KnownSign);
971 break;
972 }
973 case TargetOpcode::G_FMA:
974 case TargetOpcode::G_STRICT_FMA:
975 case TargetOpcode::G_FMAD: {
976 if ((InterestedClasses & fcNegative) == fcNone)
977 break;
978
979 Register A = MI.getOperand(i: 1).getReg();
980 Register B = MI.getOperand(i: 2).getReg();
981 Register C = MI.getOperand(i: 3).getReg();
982
983 if (A != B)
984 break;
985
986 // The multiply cannot be -0 and therefore the add can't be -0
987 Known.knownNot(RuleOut: fcNegZero);
988
989 // x * x + y is non-negative if y is non-negative.
990 KnownFPClass KnownAddend;
991 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend,
992 Depth: Depth + 1);
993
994 if (KnownAddend.cannotBeOrderedLessThanZero())
995 Known.knownNot(RuleOut: fcNegative);
996 break;
997 }
998 case TargetOpcode::G_FSQRT:
999 case TargetOpcode::G_STRICT_FSQRT: {
1000 KnownFPClass KnownSrc;
1001 FPClassTest InterestedSrcs = InterestedClasses;
1002 if (InterestedClasses & fcNan)
1003 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1004
1005 Register Val = MI.getOperand(i: 1).getReg();
1006
1007 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1008
1009 if (KnownSrc.isKnownNeverPosInfinity())
1010 Known.knownNot(RuleOut: fcPosInf);
1011 if (KnownSrc.isKnownNever(Mask: fcSNan))
1012 Known.knownNot(RuleOut: fcSNan);
1013
1014 // Any negative value besides -0 returns a nan.
1015 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1016 Known.knownNot(RuleOut: fcNan);
1017
1018 // The only negative value that can be returned is -0 for -0 inputs.
1019 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
1020 break;
1021 }
1022 case TargetOpcode::G_FABS: {
1023 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1024 Register Val = MI.getOperand(i: 1).getReg();
1025 // If we only care about the sign bit we don't need to inspect the
1026 // operand.
1027 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known,
1028 Depth: Depth + 1);
1029 }
1030 Known.fabs();
1031 break;
1032 }
1033 case TargetOpcode::G_FSIN:
1034 case TargetOpcode::G_FCOS:
1035 case TargetOpcode::G_FSINCOS: {
1036 // Return NaN on infinite inputs.
1037 Register Val = MI.getOperand(i: 1).getReg();
1038 KnownFPClass KnownSrc;
1039
1040 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1041 Depth: Depth + 1);
1042 Known.knownNot(RuleOut: fcInf);
1043
1044 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1045 Known.knownNot(RuleOut: fcNan);
1046 break;
1047 }
1048 case TargetOpcode::G_FMAXNUM:
1049 case TargetOpcode::G_FMINNUM:
1050 case TargetOpcode::G_FMINNUM_IEEE:
1051 case TargetOpcode::G_FMAXIMUM:
1052 case TargetOpcode::G_FMINIMUM:
1053 case TargetOpcode::G_FMAXNUM_IEEE:
1054 case TargetOpcode::G_FMAXIMUMNUM:
1055 case TargetOpcode::G_FMINIMUMNUM: {
1056 Register LHS = MI.getOperand(i: 1).getReg();
1057 Register RHS = MI.getOperand(i: 2).getReg();
1058 KnownFPClass KnownLHS, KnownRHS;
1059
1060 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS,
1061 Depth: Depth + 1);
1062 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS,
1063 Depth: Depth + 1);
1064
1065 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1066 Known = KnownLHS | KnownRHS;
1067
1068 // If either operand is not NaN, the result is not NaN.
1069 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1070 Opcode == TargetOpcode::G_FMAXNUM ||
1071 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1072 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1073 Known.knownNot(RuleOut: fcNan);
1074
1075 if (Opcode == TargetOpcode::G_FMAXNUM ||
1076 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1077 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1078 // If at least one operand is known to be positive, the result must be
1079 // positive.
1080 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1081 KnownLHS.isKnownNeverNaN()) ||
1082 (KnownRHS.cannotBeOrderedLessThanZero() &&
1083 KnownRHS.isKnownNeverNaN()))
1084 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1085 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1086 // If at least one operand is known to be positive, the result must be
1087 // positive.
1088 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1089 KnownRHS.cannotBeOrderedLessThanZero())
1090 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1091 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1092 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1093 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1094 // If at least one operand is known to be negative, the result must be
1095 // negative.
1096 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1097 KnownLHS.isKnownNeverNaN()) ||
1098 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1099 KnownRHS.isKnownNeverNaN()))
1100 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1101 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1102 // If at least one operand is known to be negative, the result must be
1103 // negative.
1104 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1105 KnownRHS.cannotBeOrderedGreaterThanZero())
1106 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1107 } else {
1108 llvm_unreachable("unhandled intrinsic");
1109 }
1110
1111 // Fixup zero handling if denormals could be returned as a zero.
1112 //
1113 // As there's no spec for denormal flushing, be conservative with the
1114 // treatment of denormals that could be flushed to zero. For older
1115 // subtargets on AMDGPU the min/max instructions would not flush the
1116 // output and return the original value.
1117 //
1118 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1119 !Known.isKnownNeverSubnormal()) {
1120 DenormalMode Mode =
1121 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1122 if (Mode != DenormalMode::getIEEE())
1123 Known.KnownFPClasses |= fcZero;
1124 }
1125
1126 if (Known.isKnownNeverNaN()) {
1127 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1128 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1129 if (*KnownLHS.SignBit)
1130 Known.signBitMustBeOne();
1131 else
1132 Known.signBitMustBeZero();
1133 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1134 Opcode == TargetOpcode::G_FMINIMUM) ||
1135 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1136 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1137 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1138 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1139 // FIXME: Should be using logical zero versions
1140 ((KnownLHS.isKnownNeverNegZero() ||
1141 KnownRHS.isKnownNeverPosZero()) &&
1142 (KnownLHS.isKnownNeverPosZero() ||
1143 KnownRHS.isKnownNeverNegZero()))) {
1144 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1145 Opcode == TargetOpcode::G_FMAXNUM ||
1146 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1147 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1148 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1149 Known.signBitMustBeZero();
1150 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1151 Opcode == TargetOpcode::G_FMINNUM ||
1152 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1153 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1154 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1155 Known.signBitMustBeOne();
1156 }
1157 }
1158 break;
1159 }
1160 case TargetOpcode::G_FCANONICALIZE: {
1161 Register Val = MI.getOperand(i: 1).getReg();
1162 KnownFPClass KnownSrc;
1163 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1164 Depth: Depth + 1);
1165
1166 // This is essentially a stronger form of
1167 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1168 // actually have an IR canonicalization guarantee.
1169
1170 // Canonicalize may flush denormals to zero, so we have to consider the
1171 // denormal mode to preserve known-not-0 knowledge.
1172 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1173
1174 // Stronger version of propagateNaN
1175 // Canonicalize is guaranteed to quiet signaling nans.
1176 if (KnownSrc.isKnownNeverNaN())
1177 Known.knownNot(RuleOut: fcNan);
1178 else
1179 Known.knownNot(RuleOut: fcSNan);
1180
1181 // If the parent function flushes denormals, the canonical output cannot
1182 // be a denormal.
1183 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1184 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1185 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1186 if (DenormMode == DenormalMode::getIEEE()) {
1187 if (KnownSrc.isKnownNever(Mask: fcPosZero))
1188 Known.knownNot(RuleOut: fcPosZero);
1189 if (KnownSrc.isKnownNever(Mask: fcNegZero))
1190 Known.knownNot(RuleOut: fcNegZero);
1191 break;
1192 }
1193
1194 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1195 Known.knownNot(RuleOut: fcSubnormal);
1196
1197 if (DenormMode.Input == DenormalMode::PositiveZero ||
1198 (DenormMode.Output == DenormalMode::PositiveZero &&
1199 DenormMode.Input == DenormalMode::IEEE))
1200 Known.knownNot(RuleOut: fcNegZero);
1201
1202 break;
1203 }
1204 case TargetOpcode::G_VECREDUCE_FMAX:
1205 case TargetOpcode::G_VECREDUCE_FMIN:
1206 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1207 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1208 Register Val = MI.getOperand(i: 1).getReg();
1209 // reduce min/max will choose an element from one of the vector elements,
1210 // so we can infer and class information that is common to all elements.
1211
1212 Known =
1213 computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1);
1214 // Can only propagate sign if output is never NaN.
1215 if (!Known.isKnownNeverNaN())
1216 Known.SignBit.reset();
1217 break;
1218 }
1219 case TargetOpcode::G_TRUNC:
1220 case TargetOpcode::G_FFLOOR:
1221 case TargetOpcode::G_FCEIL:
1222 case TargetOpcode::G_FRINT:
1223 case TargetOpcode::G_FNEARBYINT:
1224 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1225 case TargetOpcode::G_INTRINSIC_ROUND: {
1226 Register Val = MI.getOperand(i: 1).getReg();
1227 KnownFPClass KnownSrc;
1228 FPClassTest InterestedSrcs = InterestedClasses;
1229 if (InterestedSrcs & fcPosFinite)
1230 InterestedSrcs |= fcPosFinite;
1231 if (InterestedSrcs & fcNegFinite)
1232 InterestedSrcs |= fcNegFinite;
1233 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1234
1235 // Integer results cannot be subnormal.
1236 Known.knownNot(RuleOut: fcSubnormal);
1237
1238 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
1239
1240 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1241
1242 // Negative round ups to 0 produce -0
1243 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
1244 Known.knownNot(RuleOut: fcPosFinite);
1245 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
1246 Known.knownNot(RuleOut: fcNegFinite);
1247
1248 break;
1249 }
1250 case TargetOpcode::G_FEXP:
1251 case TargetOpcode::G_FEXP2:
1252 case TargetOpcode::G_FEXP10: {
1253 Known.knownNot(RuleOut: fcNegative);
1254 if ((InterestedClasses & fcNan) == fcNone)
1255 break;
1256
1257 Register Val = MI.getOperand(i: 1).getReg();
1258 KnownFPClass KnownSrc;
1259 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1260 Depth: Depth + 1);
1261 if (KnownSrc.isKnownNeverNaN()) {
1262 Known.knownNot(RuleOut: fcNan);
1263 Known.signBitMustBeZero();
1264 }
1265
1266 break;
1267 }
1268 case TargetOpcode::G_FLOG:
1269 case TargetOpcode::G_FLOG2:
1270 case TargetOpcode::G_FLOG10: {
1271 // log(+inf) -> +inf
1272 // log([+-]0.0) -> -inf
1273 // log(-inf) -> nan
1274 // log(-x) -> nan
1275 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1276 break;
1277
1278 FPClassTest InterestedSrcs = InterestedClasses;
1279 if ((InterestedClasses & fcNegInf) != fcNone)
1280 InterestedSrcs |= fcZero | fcSubnormal;
1281 if ((InterestedClasses & fcNan) != fcNone)
1282 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1283
1284 Register Val = MI.getOperand(i: 1).getReg();
1285 KnownFPClass KnownSrc;
1286 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1287
1288 if (KnownSrc.isKnownNeverPosInfinity())
1289 Known.knownNot(RuleOut: fcPosInf);
1290
1291 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1292 Known.knownNot(RuleOut: fcNan);
1293
1294 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1295 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1296 DenormalMode Mode = MF->getDenormalMode(FPType: FltSem);
1297
1298 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1299 Known.knownNot(RuleOut: fcNegInf);
1300
1301 break;
1302 }
1303 case TargetOpcode::G_FPOWI: {
1304 if ((InterestedClasses & fcNegative) == fcNone)
1305 break;
1306
1307 Register Exp = MI.getOperand(i: 2).getReg();
1308 LLT ExpTy = MRI.getType(Reg: Exp);
1309 KnownBits ExponentKnownBits = getKnownBits(
1310 R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1311
1312 if (ExponentKnownBits.Zero[0]) { // Is even
1313 Known.knownNot(RuleOut: fcNegative);
1314 break;
1315 }
1316
1317 // Given that exp is an integer, here are the
1318 // ways that pow can return a negative value:
1319 //
1320 // pow(-x, exp) --> negative if exp is odd and x is negative.
1321 // pow(-0, exp) --> -inf if exp is negative odd.
1322 // pow(-0, exp) --> -0 if exp is positive odd.
1323 // pow(-inf, exp) --> -0 if exp is negative odd.
1324 // pow(-inf, exp) --> -inf if exp is positive odd.
1325 Register Val = MI.getOperand(i: 1).getReg();
1326 KnownFPClass KnownSrc;
1327 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: fcNegative, Known&: KnownSrc, Depth: Depth + 1);
1328 if (KnownSrc.isKnownNever(Mask: fcNegative))
1329 Known.knownNot(RuleOut: fcNegative);
1330 break;
1331 }
1332 case TargetOpcode::G_FLDEXP:
1333 case TargetOpcode::G_STRICT_FLDEXP: {
1334 Register Val = MI.getOperand(i: 1).getReg();
1335 KnownFPClass KnownSrc;
1336 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1337 Depth: Depth + 1);
1338 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
1339
1340 // Sign is preserved, but underflows may produce zeroes.
1341 if (KnownSrc.isKnownNever(Mask: fcNegative))
1342 Known.knownNot(RuleOut: fcNegative);
1343 else if (KnownSrc.cannotBeOrderedLessThanZero())
1344 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1345
1346 if (KnownSrc.isKnownNever(Mask: fcPositive))
1347 Known.knownNot(RuleOut: fcPositive);
1348 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1349 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1350
1351 // Can refine inf/zero handling based on the exponent operand.
1352 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1353 if ((InterestedClasses & ExpInfoMask) == fcNone)
1354 break;
1355 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1356 break;
1357
1358 // TODO: Handle constant range of Exp
1359
1360 break;
1361 }
1362 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1363 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1364 Depth);
1365 break;
1366 }
1367 case TargetOpcode::G_FADD:
1368 case TargetOpcode::G_STRICT_FADD:
1369 case TargetOpcode::G_FSUB:
1370 case TargetOpcode::G_STRICT_FSUB: {
1371 Register LHS = MI.getOperand(i: 1).getReg();
1372 Register RHS = MI.getOperand(i: 2).getReg();
1373 KnownFPClass KnownLHS, KnownRHS;
1374 bool WantNegative =
1375 (Opcode == TargetOpcode::G_FADD ||
1376 Opcode == TargetOpcode::G_STRICT_FADD) &&
1377 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1378 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1379 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1380
1381 if (!WantNaN && !WantNegative && !WantNegZero)
1382 break;
1383
1384 FPClassTest InterestedSrcs = InterestedClasses;
1385 if (WantNegative)
1386 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1387 if (InterestedClasses & fcNan)
1388 InterestedSrcs |= fcInf;
1389 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1);
1390
1391 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1392 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1393 WantNegZero ||
1394 (Opcode == TargetOpcode::G_FSUB ||
1395 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1396
1397 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1398 // there's no point.
1399 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS,
1400 Depth: Depth + 1);
1401 // Adding positive and negative infinity produces NaN.
1402 // TODO: Check sign of infinities.
1403 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1404 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1405 Known.knownNot(RuleOut: fcNan);
1406
1407 if (Opcode == TargetOpcode::G_FADD ||
1408 Opcode == TargetOpcode::G_STRICT_FADD) {
1409 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1410 KnownRHS.cannotBeOrderedLessThanZero())
1411 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1412
1413 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1414 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1415 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1416 KnownRHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1417 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1418 // Make sure output negative denormal can't flush to -0
1419 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1420 Known.knownNot(RuleOut: fcNegZero);
1421 } else {
1422 // Only fsub -0, +0 can return -0
1423 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1424 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1425 KnownRHS.isKnownNeverLogicalPosZero(Mode: MF->getDenormalMode(
1426 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1427 // Make sure output negative denormal can't flush to -0
1428 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1429 Known.knownNot(RuleOut: fcNegZero);
1430 }
1431 }
1432
1433 break;
1434 }
1435 case TargetOpcode::G_FMUL:
1436 case TargetOpcode::G_STRICT_FMUL: {
1437 Register LHS = MI.getOperand(i: 1).getReg();
1438 Register RHS = MI.getOperand(i: 2).getReg();
1439 // X * X is always non-negative or a NaN.
1440 if (LHS == RHS)
1441 Known.knownNot(RuleOut: fcNegative);
1442
1443 if ((InterestedClasses & fcNan) != fcNan)
1444 break;
1445
1446 // fcSubnormal is only needed in case of DAZ.
1447 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1448
1449 KnownFPClass KnownLHS, KnownRHS;
1450 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS, Depth: Depth + 1);
1451 if (!KnownRHS.isKnownNeverNaN())
1452 break;
1453
1454 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS, Depth: Depth + 1);
1455 if (!KnownLHS.isKnownNeverNaN())
1456 break;
1457
1458 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1459 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1460 Known.signBitMustBeZero();
1461 else
1462 Known.signBitMustBeOne();
1463 }
1464
1465 // If 0 * +/-inf produces NaN.
1466 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1467 Known.knownNot(RuleOut: fcNan);
1468 break;
1469 }
1470
1471 if ((KnownRHS.isKnownNeverInfinity() ||
1472 KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1473 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1474 (KnownLHS.isKnownNeverInfinity() ||
1475 KnownRHS.isKnownNeverLogicalZero(
1476 Mode: MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))
1477 Known.knownNot(RuleOut: fcNan);
1478
1479 break;
1480 }
1481 case TargetOpcode::G_FDIV:
1482 case TargetOpcode::G_FREM: {
1483 Register LHS = MI.getOperand(i: 1).getReg();
1484 Register RHS = MI.getOperand(i: 2).getReg();
1485
1486 if (LHS == RHS) {
1487 // TODO: Could filter out snan if we inspect the operand
1488 if (Opcode == TargetOpcode::G_FDIV) {
1489 // X / X is always exactly 1.0 or a NaN.
1490 Known.KnownFPClasses = fcNan | fcPosNormal;
1491 } else {
1492 // X % X is always exactly [+-]0.0 or a NaN.
1493 Known.KnownFPClasses = fcNan | fcZero;
1494 }
1495
1496 break;
1497 }
1498
1499 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1500 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1501 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1502 (InterestedClasses & fcPositive) != fcNone;
1503 if (!WantNan && !WantNegative && !WantPositive)
1504 break;
1505
1506 KnownFPClass KnownLHS, KnownRHS;
1507
1508 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative,
1509 Known&: KnownRHS, Depth: Depth + 1);
1510
1511 bool KnowSomethingUseful =
1512 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
1513
1514 if (KnowSomethingUseful || WantPositive) {
1515 const FPClassTest InterestedLHS =
1516 WantPositive ? fcAllFlags
1517 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
1518
1519 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & InterestedLHS,
1520 Known&: KnownLHS, Depth: Depth + 1);
1521 }
1522
1523 if (Opcode == TargetOpcode::G_FDIV) {
1524 // Only 0/0, Inf/Inf produce NaN.
1525 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1526 (KnownLHS.isKnownNeverInfinity() ||
1527 KnownRHS.isKnownNeverInfinity()) &&
1528 ((KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1529 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) ||
1530 (KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1531 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))) {
1532 Known.knownNot(RuleOut: fcNan);
1533 }
1534
1535 // X / -0.0 is -Inf (or NaN).
1536 // +X / +X is +X
1537 if (KnownLHS.isKnownNever(Mask: fcNegative) &&
1538 KnownRHS.isKnownNever(Mask: fcNegative))
1539 Known.knownNot(RuleOut: fcNegative);
1540 } else {
1541 // Inf REM x and x REM 0 produce NaN.
1542 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1543 KnownLHS.isKnownNeverInfinity() &&
1544 KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1545 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) {
1546 Known.knownNot(RuleOut: fcNan);
1547 }
1548
1549 // The sign for frem is the same as the first operand.
1550 if (KnownLHS.cannotBeOrderedLessThanZero())
1551 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1552 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1553 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1554
1555 // See if we can be more aggressive about the sign of 0.
1556 if (KnownLHS.isKnownNever(Mask: fcNegative))
1557 Known.knownNot(RuleOut: fcNegative);
1558 if (KnownLHS.isKnownNever(Mask: fcPositive))
1559 Known.knownNot(RuleOut: fcPositive);
1560 }
1561
1562 break;
1563 }
1564 case TargetOpcode::G_FPEXT: {
1565 Register Dst = MI.getOperand(i: 0).getReg();
1566 Register Src = MI.getOperand(i: 1).getReg();
1567 // Infinity, nan and zero propagate from source.
1568 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1569
1570 LLT DstTy = MRI.getType(Reg: Dst).getScalarType();
1571 const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstTy);
1572 LLT SrcTy = MRI.getType(Reg: Src).getScalarType();
1573 const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy);
1574
1575 // All subnormal inputs should be in the normal range in the result type.
1576 if (APFloat::isRepresentableAsNormalIn(Src: SrcSem, Dst: DstSem)) {
1577 if (Known.KnownFPClasses & fcPosSubnormal)
1578 Known.KnownFPClasses |= fcPosNormal;
1579 if (Known.KnownFPClasses & fcNegSubnormal)
1580 Known.KnownFPClasses |= fcNegNormal;
1581 Known.knownNot(RuleOut: fcSubnormal);
1582 }
1583
1584 // Sign bit of a nan isn't guaranteed.
1585 if (!Known.isKnownNeverNaN())
1586 Known.SignBit = std::nullopt;
1587 break;
1588 }
1589 case TargetOpcode::G_FPTRUNC: {
1590 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1591 Depth);
1592 break;
1593 }
1594 case TargetOpcode::G_SITOFP:
1595 case TargetOpcode::G_UITOFP: {
1596 // Cannot produce nan
1597 Known.knownNot(RuleOut: fcNan);
1598
1599 // Integers cannot be subnormal
1600 Known.knownNot(RuleOut: fcSubnormal);
1601
1602 // sitofp and uitofp turn into +0.0 for zero.
1603 Known.knownNot(RuleOut: fcNegZero);
1604 if (Opcode == TargetOpcode::G_UITOFP)
1605 Known.signBitMustBeZero();
1606
1607 Register Val = MI.getOperand(i: 1).getReg();
1608 LLT Ty = MRI.getType(Reg: Val);
1609
1610 if (InterestedClasses & fcInf) {
1611 // Get width of largest magnitude integer (remove a bit if signed).
1612 // This still works for a signed minimum value because the largest FP
1613 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1614 int IntSize = Ty.getScalarSizeInBits();
1615 if (Opcode == TargetOpcode::G_SITOFP)
1616 --IntSize;
1617
1618 // If the exponent of the largest finite FP value can hold the largest
1619 // integer, the result of the cast must be finite.
1620 LLT FPTy = DstTy.getScalarType();
1621 const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy);
1622 if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize)
1623 Known.knownNot(RuleOut: fcInf);
1624 }
1625
1626 break;
1627 }
1628 // case TargetOpcode::G_MERGE_VALUES:
1629 case TargetOpcode::G_BUILD_VECTOR:
1630 case TargetOpcode::G_CONCAT_VECTORS: {
1631 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI);
1632
1633 if (!DstTy.isFixedVector())
1634 break;
1635
1636 bool First = true;
1637 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1638 // We know the index we are inserting to, so clear it from Vec check.
1639 bool NeedsElt = DemandedElts[Idx];
1640
1641 // Do we demand the inserted element?
1642 if (NeedsElt) {
1643 Register Src = Merge.getSourceReg(I: Idx);
1644 if (First) {
1645 computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1);
1646 First = false;
1647 } else {
1648 KnownFPClass Known2;
1649 computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1);
1650 Known |= Known2;
1651 }
1652
1653 // If we don't know any bits, early out.
1654 if (Known.isUnknown())
1655 break;
1656 }
1657 }
1658
1659 break;
1660 }
1661 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1662 // Look through extract element. If the index is non-constant or
1663 // out-of-range demand all elements, otherwise just the extracted
1664 // element.
1665 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
1666 Register Vec = Extract.getVectorReg();
1667 Register Idx = Extract.getIndexReg();
1668
1669 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1670
1671 LLT VecTy = MRI.getType(Reg: Vec);
1672
1673 if (VecTy.isFixedVector()) {
1674 unsigned NumElts = VecTy.getNumElements();
1675 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1676 if (CIdx && CIdx->ult(RHS: NumElts))
1677 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1678 return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
1679 Depth: Depth + 1);
1680 }
1681
1682 break;
1683 }
1684 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1685 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
1686 Register Vec = Insert.getVectorReg();
1687 Register Elt = Insert.getElementReg();
1688 Register Idx = Insert.getIndexReg();
1689
1690 LLT VecTy = MRI.getType(Reg: Vec);
1691
1692 if (VecTy.isScalableVector())
1693 return;
1694
1695 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1696
1697 unsigned NumElts = DemandedElts.getBitWidth();
1698 APInt DemandedVecElts = DemandedElts;
1699 bool NeedsElt = true;
1700 // If we know the index we are inserting to, clear it from Vec check.
1701 if (CIdx && CIdx->ult(RHS: NumElts)) {
1702 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1703 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1704 }
1705
1706 // Do we demand the inserted element?
1707 if (NeedsElt) {
1708 computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1);
1709 // If we don't know any bits, early out.
1710 if (Known.isUnknown())
1711 break;
1712 } else {
1713 Known.KnownFPClasses = fcNone;
1714 }
1715
1716 // Do we need anymore elements from Vec?
1717 if (!DemandedVecElts.isZero()) {
1718 KnownFPClass Known2;
1719 computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
1720 Depth: Depth + 1);
1721 Known |= Known2;
1722 }
1723
1724 break;
1725 }
1726 case TargetOpcode::G_SHUFFLE_VECTOR: {
1727 // For undef elements, we don't know anything about the common state of
1728 // the shuffle result.
1729 GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI);
1730 APInt DemandedLHS, DemandedRHS;
1731 if (DstTy.isScalableVector()) {
1732 assert(DemandedElts == APInt(1, 1));
1733 DemandedLHS = DemandedRHS = DemandedElts;
1734 } else {
1735 if (!llvm::getShuffleDemandedElts(SrcWidth: DstTy.getNumElements(), Mask: Shuf.getMask(),
1736 DemandedElts, DemandedLHS,
1737 DemandedRHS)) {
1738 Known.resetAll();
1739 return;
1740 }
1741 }
1742
1743 if (!!DemandedLHS) {
1744 Register LHS = Shuf.getSrc1Reg();
1745 computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
1746 Depth: Depth + 1);
1747
1748 // If we don't know any bits, early out.
1749 if (Known.isUnknown())
1750 break;
1751 } else {
1752 Known.KnownFPClasses = fcNone;
1753 }
1754
1755 if (!!DemandedRHS) {
1756 KnownFPClass Known2;
1757 Register RHS = Shuf.getSrc2Reg();
1758 computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
1759 Depth: Depth + 1);
1760 Known |= Known2;
1761 }
1762 break;
1763 }
1764 case TargetOpcode::COPY: {
1765 Register Src = MI.getOperand(i: 1).getReg();
1766
1767 if (!Src.isVirtual())
1768 return;
1769
1770 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1771 break;
1772 }
1773 }
1774}
1775
1776KnownFPClass
1777GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1778 FPClassTest InterestedClasses,
1779 unsigned Depth) {
1780 KnownFPClass KnownClasses;
1781 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth);
1782 return KnownClasses;
1783}
1784
1785KnownFPClass GISelValueTracking::computeKnownFPClass(
1786 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1787 KnownFPClass Known;
1788 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1789 return Known;
1790}
1791
1792KnownFPClass GISelValueTracking::computeKnownFPClass(
1793 Register R, const APInt &DemandedElts, uint32_t Flags,
1794 FPClassTest InterestedClasses, unsigned Depth) {
1795 if (Flags & MachineInstr::MIFlag::FmNoNans)
1796 InterestedClasses &= ~fcNan;
1797 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1798 InterestedClasses &= ~fcInf;
1799
1800 KnownFPClass Result =
1801 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1802
1803 if (Flags & MachineInstr::MIFlag::FmNoNans)
1804 Result.KnownFPClasses &= ~fcNan;
1805 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1806 Result.KnownFPClasses &= ~fcInf;
1807 return Result;
1808}
1809
1810KnownFPClass GISelValueTracking::computeKnownFPClass(
1811 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1812 LLT Ty = MRI.getType(Reg: R);
1813 APInt DemandedElts =
1814 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
1815 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1816}
1817
1818/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1819unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1820 const APInt &DemandedElts,
1821 unsigned Depth) {
1822 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1823 unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth);
1824 if (Src1SignBits == 1)
1825 return 1;
1826 return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits);
1827}
1828
1829/// Compute the known number of sign bits with attached range metadata in the
1830/// memory operand. If this is an extending load, accounts for the behavior of
1831/// the high bits.
1832static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
1833 unsigned TyBits) {
1834 const MDNode *Ranges = Ld->getRanges();
1835 if (!Ranges)
1836 return 1;
1837
1838 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges);
1839 if (TyBits > CR.getBitWidth()) {
1840 switch (Ld->getOpcode()) {
1841 case TargetOpcode::G_SEXTLOAD:
1842 CR = CR.signExtend(BitWidth: TyBits);
1843 break;
1844 case TargetOpcode::G_ZEXTLOAD:
1845 CR = CR.zeroExtend(BitWidth: TyBits);
1846 break;
1847 default:
1848 break;
1849 }
1850 }
1851
1852 return std::min(a: CR.getSignedMin().getNumSignBits(),
1853 b: CR.getSignedMax().getNumSignBits());
1854}
1855
1856unsigned GISelValueTracking::computeNumSignBits(Register R,
1857 const APInt &DemandedElts,
1858 unsigned Depth) {
1859 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
1860 unsigned Opcode = MI.getOpcode();
1861
1862 if (Opcode == TargetOpcode::G_CONSTANT)
1863 return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits();
1864
1865 if (Depth == getMaxDepth())
1866 return 1;
1867
1868 if (!DemandedElts)
1869 return 1; // No demanded elts, better to assume we don't know anything.
1870
1871 LLT DstTy = MRI.getType(Reg: R);
1872 const unsigned TyBits = DstTy.getScalarSizeInBits();
1873
1874 // Handle the case where this is called on a register that does not have a
1875 // type constraint. This is unlikely to occur except by looking through copies
1876 // but it is possible for the initial register being queried to be in this
1877 // state.
1878 if (!DstTy.isValid())
1879 return 1;
1880
1881 unsigned FirstAnswer = 1;
1882 switch (Opcode) {
1883 case TargetOpcode::COPY: {
1884 MachineOperand &Src = MI.getOperand(i: 1);
1885 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1886 MRI.getType(Reg: Src.getReg()).isValid()) {
1887 // Don't increment Depth for this one since we didn't do any work.
1888 return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth);
1889 }
1890
1891 return 1;
1892 }
1893 case TargetOpcode::G_SEXT: {
1894 Register Src = MI.getOperand(i: 1).getReg();
1895 LLT SrcTy = MRI.getType(Reg: Src);
1896 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1897 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp;
1898 }
1899 case TargetOpcode::G_ASSERT_SEXT:
1900 case TargetOpcode::G_SEXT_INREG: {
1901 // Max of the input and what this extends.
1902 Register Src = MI.getOperand(i: 1).getReg();
1903 unsigned SrcBits = MI.getOperand(i: 2).getImm();
1904 unsigned InRegBits = TyBits - SrcBits + 1;
1905 return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1),
1906 b: InRegBits);
1907 }
1908 case TargetOpcode::G_LOAD: {
1909 GLoad *Ld = cast<GLoad>(Val: &MI);
1910 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1911 break;
1912
1913 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1914 }
1915 case TargetOpcode::G_SEXTLOAD: {
1916 GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI);
1917
1918 // FIXME: We need an in-memory type representation.
1919 if (DstTy.isVector())
1920 return 1;
1921
1922 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1923 if (NumBits != 1)
1924 return NumBits;
1925
1926 // e.g. i16->i32 = '17' bits known.
1927 const MachineMemOperand *MMO = *MI.memoperands_begin();
1928 return TyBits - MMO->getSizeInBits().getValue() + 1;
1929 }
1930 case TargetOpcode::G_ZEXTLOAD: {
1931 GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI);
1932
1933 // FIXME: We need an in-memory type representation.
1934 if (DstTy.isVector())
1935 return 1;
1936
1937 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1938 if (NumBits != 1)
1939 return NumBits;
1940
1941 // e.g. i16->i32 = '16' bits known.
1942 const MachineMemOperand *MMO = *MI.memoperands_begin();
1943 return TyBits - MMO->getSizeInBits().getValue();
1944 }
1945 case TargetOpcode::G_AND:
1946 case TargetOpcode::G_OR:
1947 case TargetOpcode::G_XOR: {
1948 Register Src1 = MI.getOperand(i: 1).getReg();
1949 unsigned Src1NumSignBits =
1950 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
1951 if (Src1NumSignBits != 1) {
1952 Register Src2 = MI.getOperand(i: 2).getReg();
1953 unsigned Src2NumSignBits =
1954 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
1955 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits);
1956 }
1957 break;
1958 }
1959 case TargetOpcode::G_ASHR: {
1960 Register Src1 = MI.getOperand(i: 1).getReg();
1961 Register Src2 = MI.getOperand(i: 2).getReg();
1962 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
1963 if (auto C = getValidMinimumShiftAmount(R: Src2, DemandedElts, Depth: Depth + 1))
1964 FirstAnswer = std::min<uint64_t>(a: FirstAnswer + *C, b: TyBits);
1965 break;
1966 }
1967 case TargetOpcode::G_SHL: {
1968 Register Src1 = MI.getOperand(i: 1).getReg();
1969 Register Src2 = MI.getOperand(i: 2).getReg();
1970 if (std::optional<ConstantRange> ShAmtRange =
1971 getValidShiftAmountRange(R: Src2, DemandedElts, Depth: Depth + 1)) {
1972 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
1973 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
1974
1975 MachineInstr &ExtMI = *MRI.getVRegDef(Reg: Src1);
1976 unsigned ExtOpc = ExtMI.getOpcode();
1977
1978 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
1979 // shifted out, then we can compute the number of sign bits for the
1980 // operand being extended. A future improvement could be to pass along the
1981 // "shifted left by" information in the recursive calls to
1982 // ComputeKnownSignBits. Allowing us to handle this more generically.
1983 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
1984 ExtOpc == TargetOpcode::G_ANYEXT) {
1985 LLT ExtTy = MRI.getType(Reg: Src1);
1986 Register Extendee = ExtMI.getOperand(i: 1).getReg();
1987 LLT ExtendeeTy = MRI.getType(Reg: Extendee);
1988 uint64_t SizeDiff =
1989 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
1990
1991 if (SizeDiff <= MinShAmt) {
1992 unsigned Tmp =
1993 SizeDiff + computeNumSignBits(R: Extendee, DemandedElts, Depth: Depth + 1);
1994 if (MaxShAmt < Tmp)
1995 return Tmp - MaxShAmt;
1996 }
1997 }
1998 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
1999 unsigned Tmp = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2000 if (MaxShAmt < Tmp)
2001 return Tmp - MaxShAmt;
2002 }
2003 break;
2004 }
2005 case TargetOpcode::G_TRUNC: {
2006 Register Src = MI.getOperand(i: 1).getReg();
2007 LLT SrcTy = MRI.getType(Reg: Src);
2008
2009 // Check if the sign bits of source go down as far as the truncated value.
2010 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2011 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2012 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2013 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2014 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2015 break;
2016 }
2017 case TargetOpcode::G_SELECT: {
2018 return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(),
2019 Src1: MI.getOperand(i: 3).getReg(), DemandedElts,
2020 Depth: Depth + 1);
2021 }
2022 case TargetOpcode::G_SMIN:
2023 case TargetOpcode::G_SMAX:
2024 case TargetOpcode::G_UMIN:
2025 case TargetOpcode::G_UMAX:
2026 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2027 return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(),
2028 Src1: MI.getOperand(i: 2).getReg(), DemandedElts,
2029 Depth: Depth + 1);
2030 case TargetOpcode::G_SADDO:
2031 case TargetOpcode::G_SADDE:
2032 case TargetOpcode::G_UADDO:
2033 case TargetOpcode::G_UADDE:
2034 case TargetOpcode::G_SSUBO:
2035 case TargetOpcode::G_SSUBE:
2036 case TargetOpcode::G_USUBO:
2037 case TargetOpcode::G_USUBE:
2038 case TargetOpcode::G_SMULO:
2039 case TargetOpcode::G_UMULO: {
2040 // If compares returns 0/-1, all bits are sign bits.
2041 // We know that we have an integer-based boolean since these operations
2042 // are only available for integer.
2043 if (MI.getOperand(i: 1).getReg() == R) {
2044 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
2045 TargetLowering::ZeroOrNegativeOneBooleanContent)
2046 return TyBits;
2047 }
2048
2049 break;
2050 }
2051 case TargetOpcode::G_SUB: {
2052 Register Src2 = MI.getOperand(i: 2).getReg();
2053 unsigned Src2NumSignBits =
2054 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2055 if (Src2NumSignBits == 1)
2056 return 1; // Early out.
2057
2058 // Handle NEG.
2059 Register Src1 = MI.getOperand(i: 1).getReg();
2060 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2061 if (Known1.isZero()) {
2062 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2063 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2064 // sign bits set.
2065 if ((Known2.Zero | 1).isAllOnes())
2066 return TyBits;
2067
2068 // If the input is known to be positive (the sign bit is known clear),
2069 // the output of the NEG has, at worst, the same number of sign bits as
2070 // the input.
2071 if (Known2.isNonNegative()) {
2072 FirstAnswer = Src2NumSignBits;
2073 break;
2074 }
2075
2076 // Otherwise, we treat this like a SUB.
2077 }
2078
2079 unsigned Src1NumSignBits =
2080 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2081 if (Src1NumSignBits == 1)
2082 return 1; // Early Out.
2083
2084 // Sub can have at most one carry bit. Thus we know that the output
2085 // is, at worst, one more bit than the inputs.
2086 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2087 break;
2088 }
2089 case TargetOpcode::G_ADD: {
2090 Register Src2 = MI.getOperand(i: 2).getReg();
2091 unsigned Src2NumSignBits =
2092 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2093 if (Src2NumSignBits <= 2)
2094 return 1; // Early out.
2095
2096 Register Src1 = MI.getOperand(i: 1).getReg();
2097 unsigned Src1NumSignBits =
2098 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2099 if (Src1NumSignBits == 1)
2100 return 1; // Early Out.
2101
2102 // Special case decrementing a value (ADD X, -1):
2103 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2104 if (Known2.isAllOnes()) {
2105 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2106 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2107 // sign bits set.
2108 if ((Known1.Zero | 1).isAllOnes())
2109 return TyBits;
2110
2111 // If we are subtracting one from a positive number, there is no carry
2112 // out of the result.
2113 if (Known1.isNonNegative()) {
2114 FirstAnswer = Src1NumSignBits;
2115 break;
2116 }
2117
2118 // Otherwise, we treat this like an ADD.
2119 }
2120
2121 // Add can have at most one carry bit. Thus we know that the output
2122 // is, at worst, one more bit than the inputs.
2123 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2124 break;
2125 }
2126 case TargetOpcode::G_FCMP:
2127 case TargetOpcode::G_ICMP: {
2128 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2129 if (TyBits == 1)
2130 break;
2131 auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP);
2132 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
2133 return TyBits; // All bits are sign bits.
2134 if (BC == TargetLowering::ZeroOrOneBooleanContent)
2135 return TyBits - 1; // Every always-zero bit is a sign bit.
2136 break;
2137 }
2138 case TargetOpcode::G_BUILD_VECTOR: {
2139 // Collect the known bits that are shared by every demanded vector element.
2140 FirstAnswer = TyBits;
2141 APInt SingleDemandedElt(1, 1);
2142 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2143 if (!DemandedElts[I])
2144 continue;
2145
2146 unsigned Tmp2 =
2147 computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1);
2148 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2149
2150 // If we don't know any bits, early out.
2151 if (FirstAnswer == 1)
2152 break;
2153 }
2154 break;
2155 }
2156 case TargetOpcode::G_CONCAT_VECTORS: {
2157 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
2158 break;
2159 FirstAnswer = TyBits;
2160 // Determine the minimum number of sign bits across all demanded
2161 // elts of the input vectors. Early out if the result is already 1.
2162 unsigned NumSubVectorElts =
2163 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
2164 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2165 APInt DemandedSub =
2166 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
2167 if (!DemandedSub)
2168 continue;
2169 unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1);
2170
2171 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2172
2173 // If we don't know any bits, early out.
2174 if (FirstAnswer == 1)
2175 break;
2176 }
2177 break;
2178 }
2179 case TargetOpcode::G_SHUFFLE_VECTOR: {
2180 // Collect the minimum number of sign bits that are shared by every vector
2181 // element referenced by the shuffle.
2182 APInt DemandedLHS, DemandedRHS;
2183 Register Src1 = MI.getOperand(i: 1).getReg();
2184 unsigned NumElts = MRI.getType(Reg: Src1).getNumElements();
2185 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
2186 DemandedElts, DemandedLHS, DemandedRHS))
2187 return 1;
2188
2189 if (!!DemandedLHS)
2190 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1);
2191 // If we don't know anything, early out and try computeKnownBits fall-back.
2192 if (FirstAnswer == 1)
2193 break;
2194 if (!!DemandedRHS) {
2195 unsigned Tmp2 =
2196 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1);
2197 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2198 }
2199 break;
2200 }
2201 case TargetOpcode::G_SPLAT_VECTOR: {
2202 // Check if the sign bits of source go down as far as the truncated value.
2203 Register Src = MI.getOperand(i: 1).getReg();
2204 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1);
2205 unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits();
2206 if (NumSrcSignBits > (NumSrcBits - TyBits))
2207 return NumSrcSignBits - (NumSrcBits - TyBits);
2208 break;
2209 }
2210 case TargetOpcode::G_INTRINSIC:
2211 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2212 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2213 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2214 default: {
2215 unsigned NumBits =
2216 TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth);
2217 if (NumBits > 1)
2218 FirstAnswer = std::max(a: FirstAnswer, b: NumBits);
2219 break;
2220 }
2221 }
2222
2223 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2224 // use this information.
2225 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2226 APInt Mask;
2227 if (Known.isNonNegative()) { // sign bit is 0
2228 Mask = Known.Zero;
2229 } else if (Known.isNegative()) { // sign bit is 1;
2230 Mask = Known.One;
2231 } else {
2232 // Nothing known.
2233 return FirstAnswer;
2234 }
2235
2236 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2237 // the number of identical bits in the top of the input value.
2238 Mask <<= Mask.getBitWidth() - TyBits;
2239 return std::max(a: FirstAnswer, b: Mask.countl_one());
2240}
2241
2242unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2243 LLT Ty = MRI.getType(Reg: R);
2244 APInt DemandedElts =
2245 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
2246 return computeNumSignBits(R, DemandedElts, Depth);
2247}
2248
2249std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
2250 Register R, const APInt &DemandedElts, unsigned Depth) {
2251 // Shifting more than the bitwidth is not valid.
2252 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2253 unsigned Opcode = MI.getOpcode();
2254
2255 LLT Ty = MRI.getType(Reg: R);
2256 unsigned BitWidth = Ty.getScalarSizeInBits();
2257
2258 if (Opcode == TargetOpcode::G_CONSTANT) {
2259 const APInt &ShAmt = MI.getOperand(i: 1).getCImm()->getValue();
2260 if (ShAmt.uge(RHS: BitWidth))
2261 return std::nullopt;
2262 return ConstantRange(ShAmt);
2263 }
2264
2265 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2266 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2267 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2268 if (!DemandedElts[I])
2269 continue;
2270 MachineInstr *Op = MRI.getVRegDef(Reg: MI.getOperand(i: I + 1).getReg());
2271 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2272 MinAmt = MaxAmt = nullptr;
2273 break;
2274 }
2275
2276 const APInt &ShAmt = Op->getOperand(i: 1).getCImm()->getValue();
2277 if (ShAmt.uge(RHS: BitWidth))
2278 return std::nullopt;
2279 if (!MinAmt || MinAmt->ugt(RHS: ShAmt))
2280 MinAmt = &ShAmt;
2281 if (!MaxAmt || MaxAmt->ult(RHS: ShAmt))
2282 MaxAmt = &ShAmt;
2283 }
2284 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2285 "Failed to find matching min/max shift amounts");
2286 if (MinAmt && MaxAmt)
2287 return ConstantRange(*MinAmt, *MaxAmt + 1);
2288 }
2289
2290 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2291 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2292 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2293 if (KnownAmt.getMaxValue().ult(RHS: BitWidth))
2294 return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false);
2295
2296 return std::nullopt;
2297}
2298
2299std::optional<uint64_t> GISelValueTracking::getValidMinimumShiftAmount(
2300 Register R, const APInt &DemandedElts, unsigned Depth) {
2301 if (std::optional<ConstantRange> AmtRange =
2302 getValidShiftAmountRange(R, DemandedElts, Depth))
2303 return AmtRange->getUnsignedMin().getZExtValue();
2304 return std::nullopt;
2305}
2306
2307void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2308 AnalysisUsage &AU) const {
2309 AU.setPreservesAll();
2310 MachineFunctionPass::getAnalysisUsage(AU);
2311}
2312
2313bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2314 MachineFunction &MF) {
2315 return false;
2316}
2317
2318GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2319 if (!Info) {
2320 unsigned MaxDepth =
2321 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2322 Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth);
2323 }
2324 return *Info;
2325}
2326
2327AnalysisKey GISelValueTrackingAnalysis::Key;
2328
2329GISelValueTracking
2330GISelValueTrackingAnalysis::run(MachineFunction &MF,
2331 MachineFunctionAnalysisManager &MFAM) {
2332 return Result(MF);
2333}
2334
2335PreservedAnalyses
2336GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2337 MachineFunctionAnalysisManager &MFAM) {
2338 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
2339 const auto &MRI = MF.getRegInfo();
2340 OS << "name: ";
2341 MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false);
2342 OS << '\n';
2343
2344 for (MachineBasicBlock &BB : MF) {
2345 for (MachineInstr &MI : BB) {
2346 for (MachineOperand &MO : MI.defs()) {
2347 if (!MO.isReg() || MO.getReg().isPhysical())
2348 continue;
2349 Register Reg = MO.getReg();
2350 if (!MRI.getType(Reg).isValid())
2351 continue;
2352 KnownBits Known = VTA.getKnownBits(R: Reg);
2353 unsigned SignedBits = VTA.computeNumSignBits(R: Reg);
2354 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2355 << '\n';
2356 };
2357 }
2358 }
2359 return PreservedAnalyses::all();
2360}
2361