1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
14#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/FloatingPointMode.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/IR/ConstantRange.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/FMF.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/KnownBits.h"
39#include "llvm/Support/KnownFPClass.h"
40#include "llvm/Target/TargetMachine.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
47char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
48
49INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
50 "Analysis for ComputingKnownBits", false, true)
51
52GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
56Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
57 const MachineInstr *MI = MRI.getVRegDef(Reg: R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(i: 2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(i: 1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1);
75 }
76}
77
78KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(R: MI.getOperand(i: 0).getReg());
82}
83
84KnownBits GISelValueTracking::getKnownBits(Register R) {
85 const LLT Ty = MRI.getType(Reg: R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
94KnownBits GISelValueTracking::getKnownBits(Register R,
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
102bool GISelValueTracking::signBitIsZero(Register R) {
103 LLT Ty = MRI.getType(Reg: R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
105 return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth));
106}
107
108APInt GISelValueTracking::getKnownZeroes(Register R) {
109 return getKnownBits(R).Zero;
110}
111
112APInt GISelValueTracking::getKnownOnes(Register R) {
113 return getKnownBits(R).One;
114}
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(RHS: Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
154 numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth));
155 Mask.One = APInt::getLowBitsSet(
156 numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth));
157 return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask;
158}
159
160void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(Reg: R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(RHS: Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1),
231 Depth: Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
240 Known.One = APInt::getAllOnes(numBits: BitWidth);
241 Known.Zero = APInt::getAllOnes(numBits: BitWidth);
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(i: Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 // In case we're forwarding from a vector register to a non-vector
262 // register we need to update the demanded elements to reflect this
263 // before recursing.
264 APInt NowDemandedElts = SrcTy.isFixedVector() && !DstTy.isFixedVector()
265 ? APInt::getAllOnes(numBits: SrcTy.getNumElements())
266 : DemandedElts; // Known to be APInt(1, 1)
267 // For COPYs we don't do anything, don't increase the depth.
268 computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts: NowDemandedElts,
269 Depth: Depth + (Opcode != TargetOpcode::COPY));
270 Known2 = Known2.anyextOrTrunc(BitWidth);
271 Known = Known.intersectWith(RHS: Known2);
272 // If we reach a point where we don't know anything
273 // just stop looking through the operands.
274 if (Known.isUnknown())
275 break;
276 } else {
277 // We know nothing.
278 Known = KnownBits(BitWidth);
279 break;
280 }
281 }
282 break;
283 }
284 case TargetOpcode::G_CONSTANT: {
285 Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue());
286 break;
287 }
288 case TargetOpcode::G_FRAME_INDEX: {
289 int FrameIdx = MI.getOperand(i: 1).getIndex();
290 TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF);
291 break;
292 }
293 case TargetOpcode::G_SUB: {
294 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
295 Depth: Depth + 1);
296 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
297 Depth: Depth + 1);
298 Known = KnownBits::sub(LHS: Known, RHS: Known2);
299 break;
300 }
301 case TargetOpcode::G_XOR: {
302 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
303 Depth: Depth + 1);
304 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
305 Depth: Depth + 1);
306
307 Known ^= Known2;
308 break;
309 }
310 case TargetOpcode::G_PTR_ADD: {
311 if (DstTy.isVector())
312 break;
313 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
314 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
315 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
316 break;
317 [[fallthrough]];
318 }
319 case TargetOpcode::G_ADD: {
320 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
321 Depth: Depth + 1);
322 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
323 Depth: Depth + 1);
324 Known = KnownBits::add(LHS: Known, RHS: Known2);
325 break;
326 }
327 case TargetOpcode::G_AND: {
328 // If either the LHS or the RHS are Zero, the result is zero.
329 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
330 Depth: Depth + 1);
331 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
332 Depth: Depth + 1);
333
334 Known &= Known2;
335 break;
336 }
337 case TargetOpcode::G_OR: {
338 // If either the LHS or the RHS are Zero, the result is zero.
339 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
340 Depth: Depth + 1);
341 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
342 Depth: Depth + 1);
343
344 Known |= Known2;
345 break;
346 }
347 case TargetOpcode::G_MUL: {
348 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
349 Depth: Depth + 1);
350 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
351 Depth: Depth + 1);
352 Known = KnownBits::mul(LHS: Known, RHS: Known2);
353 break;
354 }
355 case TargetOpcode::G_UMULH: {
356 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
357 Depth: Depth + 1);
358 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
359 Depth: Depth + 1);
360 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
361 break;
362 }
363 case TargetOpcode::G_SMULH: {
364 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
365 Depth: Depth + 1);
366 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
367 Depth: Depth + 1);
368 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
369 break;
370 }
371 case TargetOpcode::G_UDIV: {
372 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
373 Depth: Depth + 1);
374 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
375 Depth: Depth + 1);
376 Known = KnownBits::udiv(LHS: Known, RHS: Known2,
377 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
378 break;
379 }
380 case TargetOpcode::G_SDIV: {
381 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
382 Depth: Depth + 1);
383 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
384 Depth: Depth + 1);
385 Known = KnownBits::sdiv(LHS: Known, RHS: Known2,
386 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
387 break;
388 }
389 case TargetOpcode::G_SELECT: {
390 computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(),
391 Known, DemandedElts, Depth: Depth + 1);
392 break;
393 }
394 case TargetOpcode::G_SMIN: {
395 // TODO: Handle clamp pattern with number of sign bits
396 KnownBits KnownRHS;
397 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
398 Depth: Depth + 1);
399 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
400 Depth: Depth + 1);
401 Known = KnownBits::smin(LHS: Known, RHS: KnownRHS);
402 break;
403 }
404 case TargetOpcode::G_SMAX: {
405 // TODO: Handle clamp pattern with number of sign bits
406 KnownBits KnownRHS;
407 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
408 Depth: Depth + 1);
409 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
410 Depth: Depth + 1);
411 Known = KnownBits::smax(LHS: Known, RHS: KnownRHS);
412 break;
413 }
414 case TargetOpcode::G_UMIN: {
415 KnownBits KnownRHS;
416 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
417 Depth: Depth + 1);
418 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
419 Depth: Depth + 1);
420 Known = KnownBits::umin(LHS: Known, RHS: KnownRHS);
421 break;
422 }
423 case TargetOpcode::G_UMAX: {
424 KnownBits KnownRHS;
425 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
426 Depth: Depth + 1);
427 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
428 Depth: Depth + 1);
429 Known = KnownBits::umax(LHS: Known, RHS: KnownRHS);
430 break;
431 }
432 case TargetOpcode::G_FCMP:
433 case TargetOpcode::G_ICMP: {
434 if (DstTy.isVector())
435 break;
436 if (TL.getBooleanContents(isVec: DstTy.isVector(),
437 isFloat: Opcode == TargetOpcode::G_FCMP) ==
438 TargetLowering::ZeroOrOneBooleanContent &&
439 BitWidth > 1)
440 Known.Zero.setBitsFrom(1);
441 break;
442 }
443 case TargetOpcode::G_SEXT: {
444 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
445 Depth: Depth + 1);
446 // If the sign bit is known to be zero or one, then sext will extend
447 // it to the top bits, else it will just zext.
448 Known = Known.sext(BitWidth);
449 break;
450 }
451 case TargetOpcode::G_ASSERT_SEXT:
452 case TargetOpcode::G_SEXT_INREG: {
453 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
454 Depth: Depth + 1);
455 Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm());
456 break;
457 }
458 case TargetOpcode::G_ANYEXT: {
459 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
460 Depth: Depth + 1);
461 Known = Known.anyext(BitWidth);
462 break;
463 }
464 case TargetOpcode::G_LOAD: {
465 const MachineMemOperand *MMO = *MI.memoperands_begin();
466 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
467 if (const MDNode *Ranges = MMO->getRanges())
468 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
469 Known = KnownRange.anyext(BitWidth: Known.getBitWidth());
470 break;
471 }
472 case TargetOpcode::G_SEXTLOAD:
473 case TargetOpcode::G_ZEXTLOAD: {
474 if (DstTy.isVector())
475 break;
476 const MachineMemOperand *MMO = *MI.memoperands_begin();
477 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
478 if (const MDNode *Ranges = MMO->getRanges())
479 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
480 Known = Opcode == TargetOpcode::G_SEXTLOAD
481 ? KnownRange.sext(BitWidth: Known.getBitWidth())
482 : KnownRange.zext(BitWidth: Known.getBitWidth());
483 break;
484 }
485 case TargetOpcode::G_ASHR: {
486 KnownBits LHSKnown, RHSKnown;
487 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
488 Depth: Depth + 1);
489 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
490 Depth: Depth + 1);
491 Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown);
492 break;
493 }
494 case TargetOpcode::G_LSHR: {
495 KnownBits LHSKnown, RHSKnown;
496 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
497 Depth: Depth + 1);
498 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
499 Depth: Depth + 1);
500 Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown);
501 break;
502 }
503 case TargetOpcode::G_SHL: {
504 KnownBits LHSKnown, RHSKnown;
505 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
506 Depth: Depth + 1);
507 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
508 Depth: Depth + 1);
509 Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown);
510 break;
511 }
512 case TargetOpcode::G_ROTL:
513 case TargetOpcode::G_ROTR: {
514 MachineInstr *AmtOpMI = MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
515 auto MaybeAmtOp = isConstantOrConstantSplatVector(MI&: *AmtOpMI, MRI);
516 if (!MaybeAmtOp)
517 break;
518
519 Register SrcReg = MI.getOperand(i: 1).getReg();
520 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
521
522 unsigned Amt = MaybeAmtOp->urem(RHS: BitWidth);
523
524 // Canonicalize to ROTR.
525 if (Opcode == TargetOpcode::G_ROTL)
526 Amt = BitWidth - Amt;
527
528 Known.Zero = Known.Zero.rotr(rotateAmt: Amt);
529 Known.One = Known.One.rotr(rotateAmt: Amt);
530 break;
531 }
532 case TargetOpcode::G_INTTOPTR:
533 case TargetOpcode::G_PTRTOINT:
534 if (DstTy.isVector())
535 break;
536 // Fall through and handle them the same as zext/trunc.
537 [[fallthrough]];
538 case TargetOpcode::G_ZEXT:
539 case TargetOpcode::G_TRUNC: {
540 Register SrcReg = MI.getOperand(i: 1).getReg();
541 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
542 Known = Known.zextOrTrunc(BitWidth);
543 break;
544 }
545 case TargetOpcode::G_ASSERT_ZEXT: {
546 Register SrcReg = MI.getOperand(i: 1).getReg();
547 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
548
549 unsigned SrcBitWidth = MI.getOperand(i: 2).getImm();
550 assert(SrcBitWidth && "SrcBitWidth can't be zero");
551 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth);
552 Known.Zero |= (~InMask);
553 Known.One &= (~Known.Zero);
554 break;
555 }
556 case TargetOpcode::G_ASSERT_ALIGN: {
557 int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm());
558
559 // TODO: Should use maximum with source
560 // If a node is guaranteed to be aligned, set low zero bits accordingly as
561 // well as clearing one bits.
562 Known.Zero.setLowBits(LogOfAlign);
563 Known.One.clearLowBits(loBits: LogOfAlign);
564 break;
565 }
566 case TargetOpcode::G_MERGE_VALUES: {
567 unsigned NumOps = MI.getNumOperands();
568 unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
569
570 for (unsigned I = 0; I != NumOps - 1; ++I) {
571 KnownBits SrcOpKnown;
572 computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown,
573 DemandedElts, Depth: Depth + 1);
574 Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize);
575 }
576 break;
577 }
578 case TargetOpcode::G_UNMERGE_VALUES: {
579 unsigned NumOps = MI.getNumOperands();
580 Register SrcReg = MI.getOperand(i: NumOps - 1).getReg();
581 LLT SrcTy = MRI.getType(Reg: SrcReg);
582
583 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
584 return; // TODO: Handle vector->subelement unmerges
585
586 // Figure out the result operand index
587 unsigned DstIdx = 0;
588 for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R;
589 ++DstIdx)
590 ;
591
592 APInt SubDemandedElts = DemandedElts;
593 if (SrcTy.isVector()) {
594 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
595 SubDemandedElts =
596 DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes);
597 }
598
599 KnownBits SrcOpKnown;
600 computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1);
601
602 if (SrcTy.isVector())
603 Known = std::move(SrcOpKnown);
604 else
605 Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx);
606 break;
607 }
608 case TargetOpcode::G_BSWAP: {
609 Register SrcReg = MI.getOperand(i: 1).getReg();
610 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
611 Known = Known.byteSwap();
612 break;
613 }
614 case TargetOpcode::G_BITREVERSE: {
615 Register SrcReg = MI.getOperand(i: 1).getReg();
616 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
617 Known = Known.reverseBits();
618 break;
619 }
620 case TargetOpcode::G_CTPOP: {
621 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
622 Depth: Depth + 1);
623 // We can bound the space the count needs. Also, bits known to be zero
624 // can't contribute to the population.
625 unsigned BitsPossiblySet = Known2.countMaxPopulation();
626 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
627 Known.Zero.setBitsFrom(LowBits);
628 // TODO: we could bound Known.One using the lower bound on the number of
629 // bits which might be set provided by popcnt KnownOne2.
630 break;
631 }
632 case TargetOpcode::G_UBFX: {
633 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
634 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
635 Depth: Depth + 1);
636 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
637 Depth: Depth + 1);
638 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
639 Depth: Depth + 1);
640 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
641 break;
642 }
643 case TargetOpcode::G_SBFX: {
644 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
645 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
646 Depth: Depth + 1);
647 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
648 Depth: Depth + 1);
649 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
650 Depth: Depth + 1);
651 OffsetKnown = OffsetKnown.sext(BitWidth);
652 WidthKnown = WidthKnown.sext(BitWidth);
653 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
654 // Sign extend the extracted value using shift left and arithmetic shift
655 // right.
656 KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth));
657 KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown);
658 Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown);
659 break;
660 }
661 case TargetOpcode::G_UADDO:
662 case TargetOpcode::G_UADDE:
663 case TargetOpcode::G_SADDO:
664 case TargetOpcode::G_SADDE: {
665 if (MI.getOperand(i: 1).getReg() == R) {
666 // If we know the result of a compare has the top bits zero, use this
667 // info.
668 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
669 TargetLowering::ZeroOrOneBooleanContent &&
670 BitWidth > 1)
671 Known.Zero.setBitsFrom(1);
672 break;
673 }
674
675 assert(MI.getOperand(0).getReg() == R &&
676 "We only compute knownbits for the sum here.");
677 // With [US]ADDE, a carry bit may be added in.
678 KnownBits Carry(1);
679 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
680 computeKnownBitsImpl(R: MI.getOperand(i: 4).getReg(), Known&: Carry, DemandedElts,
681 Depth: Depth + 1);
682 // Carry has bit width 1
683 Carry = Carry.trunc(BitWidth: 1);
684 } else {
685 Carry.setAllZero();
686 }
687
688 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
689 Depth: Depth + 1);
690 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: Known2, DemandedElts,
691 Depth: Depth + 1);
692 Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry);
693 break;
694 }
695 case TargetOpcode::G_USUBO:
696 case TargetOpcode::G_USUBE:
697 case TargetOpcode::G_SSUBO:
698 case TargetOpcode::G_SSUBE:
699 case TargetOpcode::G_UMULO:
700 case TargetOpcode::G_SMULO: {
701 if (MI.getOperand(i: 1).getReg() == R) {
702 // If we know the result of a compare has the top bits zero, use this
703 // info.
704 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
705 TargetLowering::ZeroOrOneBooleanContent &&
706 BitWidth > 1)
707 Known.Zero.setBitsFrom(1);
708 }
709 break;
710 }
711 case TargetOpcode::G_CTTZ:
712 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
713 KnownBits SrcOpKnown;
714 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
715 Depth: Depth + 1);
716 // If we have a known 1, its position is our upper bound
717 unsigned PossibleTZ = SrcOpKnown.countMaxTrailingZeros();
718 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
719 Known.Zero.setBitsFrom(LowBits);
720 break;
721 }
722 case TargetOpcode::G_CTLZ:
723 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
724 KnownBits SrcOpKnown;
725 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
726 Depth: Depth + 1);
727 // If we have a known 1, its position is our upper bound.
728 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
729 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
730 Known.Zero.setBitsFrom(LowBits);
731 break;
732 }
733 case TargetOpcode::G_CTLS: {
734 Register Reg = MI.getOperand(i: 1).getReg();
735 unsigned MinRedundantSignBits = computeNumSignBits(R: Reg, Depth: Depth + 1) - 1;
736
737 unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
738
739 ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
740 APInt(BitWidth, MaxUpperRedundantSignBits));
741
742 Known = Range.toKnownBits();
743 break;
744 }
745 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
746 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
747 Register InVec = Extract.getVectorReg();
748 Register EltNo = Extract.getIndexReg();
749
750 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
751
752 LLT VecVT = MRI.getType(Reg: InVec);
753 // computeKnownBits not yet implemented for scalable vectors.
754 if (VecVT.isScalableVector())
755 break;
756
757 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
758 const unsigned NumSrcElts = VecVT.getNumElements();
759 // A return type different from the vector's element type may lead to
760 // issues with pattern selection. Bail out to avoid that.
761 if (BitWidth > EltBitWidth)
762 break;
763
764 Known.Zero.setAllBits();
765 Known.One.setAllBits();
766
767 // If we know the element index, just demand that vector element, else for
768 // an unknown element index, ignore DemandedElts and demand them all.
769 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
770 if (ConstEltNo && ConstEltNo->ult(RHS: NumSrcElts))
771 DemandedSrcElts =
772 APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue());
773
774 computeKnownBitsImpl(R: InVec, Known, DemandedElts: DemandedSrcElts, Depth: Depth + 1);
775 break;
776 }
777 case TargetOpcode::G_SHUFFLE_VECTOR: {
778 APInt DemandedLHS, DemandedRHS;
779 // Collect the known bits that are shared by every vector element referenced
780 // by the shuffle.
781 unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
782 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
783 DemandedElts, DemandedLHS, DemandedRHS))
784 break;
785
786 // Known bits are the values that are shared by every demanded element.
787 Known.Zero.setAllBits();
788 Known.One.setAllBits();
789 if (!!DemandedLHS) {
790 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS,
791 Depth: Depth + 1);
792 Known = Known.intersectWith(RHS: Known2);
793 }
794 // If we don't know any bits, early out.
795 if (Known.isUnknown())
796 break;
797 if (!!DemandedRHS) {
798 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS,
799 Depth: Depth + 1);
800 Known = Known.intersectWith(RHS: Known2);
801 }
802 break;
803 }
804 case TargetOpcode::G_CONCAT_VECTORS: {
805 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
806 break;
807 // Split DemandedElts and test each of the demanded subvectors.
808 Known.Zero.setAllBits();
809 Known.One.setAllBits();
810 unsigned NumSubVectorElts =
811 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
812
813 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
814 APInt DemandedSub =
815 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
816 if (!!DemandedSub) {
817 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1);
818
819 Known = Known.intersectWith(RHS: Known2);
820 }
821 // If we don't know any bits, early out.
822 if (Known.isUnknown())
823 break;
824 }
825 break;
826 }
827 case TargetOpcode::G_ABS: {
828 Register SrcReg = MI.getOperand(i: 1).getReg();
829 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
830 Known = Known.abs();
831 Known.Zero.setHighBits(computeNumSignBits(R: SrcReg, DemandedElts, Depth: Depth + 1) -
832 1);
833 break;
834 }
835 }
836
837 LLVM_DEBUG(dumpResult(MI, Known, Depth));
838}
839
840static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
841 Ty = Ty.getScalarType();
842 DenormalMode Mode = MF.getDenormalMode(FPType: getFltSemanticForLLT(Ty));
843 return Mode.Output == DenormalMode::IEEE ||
844 Mode.Output == DenormalMode::PositiveZero;
845}
846
847void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
848 FPClassTest InterestedClasses,
849 unsigned Depth) {
850 LLT Ty = MRI.getType(Reg: R);
851 APInt DemandedElts =
852 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
853 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
854}
855
856void GISelValueTracking::computeKnownFPClassForFPTrunc(
857 const MachineInstr &MI, const APInt &DemandedElts,
858 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
859 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
860 fcNone)
861 return;
862
863 Register Val = MI.getOperand(i: 1).getReg();
864 KnownFPClass KnownSrc;
865 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
866 Depth: Depth + 1);
867
868 // Sign should be preserved
869 // TODO: Handle cannot be ordered greater than zero
870 if (KnownSrc.cannotBeOrderedLessThanZero())
871 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
872
873 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
874
875 // Infinity needs a range check.
876}
877
878void GISelValueTracking::computeKnownFPClass(Register R,
879 const APInt &DemandedElts,
880 FPClassTest InterestedClasses,
881 KnownFPClass &Known,
882 unsigned Depth) {
883 assert(Known.isUnknown() && "should not be called with known information");
884
885 if (!DemandedElts) {
886 // No demanded elts, better to assume we don't know anything.
887 Known.resetAll();
888 return;
889 }
890
891 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
892
893 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
894 unsigned Opcode = MI.getOpcode();
895 LLT DstTy = MRI.getType(Reg: R);
896
897 if (!DstTy.isValid()) {
898 Known.resetAll();
899 return;
900 }
901
902 if (auto Cst = GFConstant::getConstant(Const: R, MRI)) {
903 switch (Cst->getKind()) {
904 case GFConstant::GFConstantKind::Scalar: {
905 auto APF = Cst->getScalarValue();
906 Known.KnownFPClasses = APF.classify();
907 Known.SignBit = APF.isNegative();
908 break;
909 }
910 case GFConstant::GFConstantKind::FixedVector: {
911 Known.KnownFPClasses = fcNone;
912 bool SignBitAllZero = true;
913 bool SignBitAllOne = true;
914
915 for (auto C : *Cst) {
916 Known.KnownFPClasses |= C.classify();
917 if (C.isNegative())
918 SignBitAllZero = false;
919 else
920 SignBitAllOne = false;
921 }
922
923 if (SignBitAllOne != SignBitAllZero)
924 Known.SignBit = SignBitAllOne;
925
926 break;
927 }
928 case GFConstant::GFConstantKind::ScalableVector: {
929 Known.resetAll();
930 break;
931 }
932 }
933
934 return;
935 }
936
937 FPClassTest KnownNotFromFlags = fcNone;
938 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans))
939 KnownNotFromFlags |= fcNan;
940 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs))
941 KnownNotFromFlags |= fcInf;
942
943 // We no longer need to find out about these bits from inputs if we can
944 // assume this from flags/attributes.
945 InterestedClasses &= ~KnownNotFromFlags;
946
947 llvm::scope_exit ClearClassesFromFlags(
948 [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); });
949
950 // All recursive calls that increase depth must come after this.
951 if (Depth == MaxAnalysisRecursionDepth)
952 return;
953
954 const MachineFunction *MF = MI.getMF();
955
956 switch (Opcode) {
957 default:
958 TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
959 Depth);
960 break;
961 case TargetOpcode::G_FNEG: {
962 Register Val = MI.getOperand(i: 1).getReg();
963 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
964 Known.fneg();
965 break;
966 }
967 case TargetOpcode::G_SELECT: {
968 GSelect &SelMI = cast<GSelect>(Val&: MI);
969 Register Cond = SelMI.getCondReg();
970 Register LHS = SelMI.getTrueReg();
971 Register RHS = SelMI.getFalseReg();
972
973 FPClassTest FilterLHS = fcAllFlags;
974 FPClassTest FilterRHS = fcAllFlags;
975
976 Register TestedValue;
977 FPClassTest MaskIfTrue = fcAllFlags;
978 FPClassTest MaskIfFalse = fcAllFlags;
979 FPClassTest ClassVal = fcNone;
980
981 CmpInst::Predicate Pred;
982 Register CmpLHS, CmpRHS;
983 if (mi_match(R: Cond, MRI,
984 P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) {
985 // If the select filters out a value based on the class, it no longer
986 // participates in the class of the result
987
988 // TODO: In some degenerate cases we can infer something if we try again
989 // without looking through sign operations.
990 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
991 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
992 fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
993 } else if (mi_match(
994 R: Cond, MRI,
995 P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) {
996 FPClassTest TestedMask = ClassVal;
997 MaskIfTrue = TestedMask;
998 MaskIfFalse = ~TestedMask;
999 }
1000
1001 if (TestedValue == LHS) {
1002 // match !isnan(x) ? x : y
1003 FilterLHS = MaskIfTrue;
1004 } else if (TestedValue == RHS) { // && IsExactClass
1005 // match !isnan(x) ? y : x
1006 FilterRHS = MaskIfFalse;
1007 }
1008
1009 KnownFPClass Known2;
1010 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
1011 Depth: Depth + 1);
1012 Known.KnownFPClasses &= FilterLHS;
1013
1014 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
1015 Known&: Known2, Depth: Depth + 1);
1016 Known2.KnownFPClasses &= FilterRHS;
1017
1018 Known |= Known2;
1019 break;
1020 }
1021 case TargetOpcode::G_FCOPYSIGN: {
1022 Register Magnitude = MI.getOperand(i: 1).getReg();
1023 Register Sign = MI.getOperand(i: 2).getReg();
1024
1025 KnownFPClass KnownSign;
1026
1027 computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known,
1028 Depth: Depth + 1);
1029 computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign,
1030 Depth: Depth + 1);
1031 Known.copysign(Sign: KnownSign);
1032 break;
1033 }
1034 case TargetOpcode::G_FMA:
1035 case TargetOpcode::G_STRICT_FMA:
1036 case TargetOpcode::G_FMAD: {
1037 if ((InterestedClasses & fcNegative) == fcNone)
1038 break;
1039
1040 Register A = MI.getOperand(i: 1).getReg();
1041 Register B = MI.getOperand(i: 2).getReg();
1042 Register C = MI.getOperand(i: 3).getReg();
1043
1044 if (A != B)
1045 break;
1046
1047 // The multiply cannot be -0 and therefore the add can't be -0
1048 Known.knownNot(RuleOut: fcNegZero);
1049
1050 // x * x + y is non-negative if y is non-negative.
1051 KnownFPClass KnownAddend;
1052 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend,
1053 Depth: Depth + 1);
1054
1055 if (KnownAddend.cannotBeOrderedLessThanZero())
1056 Known.knownNot(RuleOut: fcNegative);
1057 break;
1058 }
1059 case TargetOpcode::G_FSQRT:
1060 case TargetOpcode::G_STRICT_FSQRT: {
1061 KnownFPClass KnownSrc;
1062 FPClassTest InterestedSrcs = InterestedClasses;
1063 if (InterestedClasses & fcNan)
1064 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1065
1066 Register Val = MI.getOperand(i: 1).getReg();
1067
1068 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1069
1070 if (KnownSrc.isKnownNeverPosInfinity())
1071 Known.knownNot(RuleOut: fcPosInf);
1072 if (KnownSrc.isKnownNever(Mask: fcSNan))
1073 Known.knownNot(RuleOut: fcSNan);
1074
1075 // Any negative value besides -0 returns a nan.
1076 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1077 Known.knownNot(RuleOut: fcNan);
1078
1079 // The only negative value that can be returned is -0 for -0 inputs.
1080 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
1081 break;
1082 }
1083 case TargetOpcode::G_FABS: {
1084 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1085 Register Val = MI.getOperand(i: 1).getReg();
1086 // If we only care about the sign bit we don't need to inspect the
1087 // operand.
1088 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known,
1089 Depth: Depth + 1);
1090 }
1091 Known.fabs();
1092 break;
1093 }
1094 case TargetOpcode::G_FSIN:
1095 case TargetOpcode::G_FCOS:
1096 case TargetOpcode::G_FSINCOS: {
1097 // Return NaN on infinite inputs.
1098 Register Val = MI.getOperand(i: 1).getReg();
1099 KnownFPClass KnownSrc;
1100
1101 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1102 Depth: Depth + 1);
1103 Known.knownNot(RuleOut: fcInf);
1104
1105 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1106 Known.knownNot(RuleOut: fcNan);
1107 break;
1108 }
1109 case TargetOpcode::G_FMAXNUM:
1110 case TargetOpcode::G_FMINNUM:
1111 case TargetOpcode::G_FMINNUM_IEEE:
1112 case TargetOpcode::G_FMAXIMUM:
1113 case TargetOpcode::G_FMINIMUM:
1114 case TargetOpcode::G_FMAXNUM_IEEE:
1115 case TargetOpcode::G_FMAXIMUMNUM:
1116 case TargetOpcode::G_FMINIMUMNUM: {
1117 Register LHS = MI.getOperand(i: 1).getReg();
1118 Register RHS = MI.getOperand(i: 2).getReg();
1119 KnownFPClass KnownLHS, KnownRHS;
1120
1121 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS,
1122 Depth: Depth + 1);
1123 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS,
1124 Depth: Depth + 1);
1125
1126 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1127 Known = KnownLHS | KnownRHS;
1128
1129 // If either operand is not NaN, the result is not NaN.
1130 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1131 Opcode == TargetOpcode::G_FMAXNUM ||
1132 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1133 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1134 Known.knownNot(RuleOut: fcNan);
1135
1136 if (Opcode == TargetOpcode::G_FMAXNUM ||
1137 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1138 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1139 // If at least one operand is known to be positive, the result must be
1140 // positive.
1141 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1142 KnownLHS.isKnownNeverNaN()) ||
1143 (KnownRHS.cannotBeOrderedLessThanZero() &&
1144 KnownRHS.isKnownNeverNaN()))
1145 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1146 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1147 // If at least one operand is known to be positive, the result must be
1148 // positive.
1149 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1150 KnownRHS.cannotBeOrderedLessThanZero())
1151 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1152 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1153 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1154 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1155 // If at least one operand is known to be negative, the result must be
1156 // negative.
1157 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1158 KnownLHS.isKnownNeverNaN()) ||
1159 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1160 KnownRHS.isKnownNeverNaN()))
1161 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1162 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1163 // If at least one operand is known to be negative, the result must be
1164 // negative.
1165 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1166 KnownRHS.cannotBeOrderedGreaterThanZero())
1167 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1168 } else {
1169 llvm_unreachable("unhandled intrinsic");
1170 }
1171
1172 // Fixup zero handling if denormals could be returned as a zero.
1173 //
1174 // As there's no spec for denormal flushing, be conservative with the
1175 // treatment of denormals that could be flushed to zero. For older
1176 // subtargets on AMDGPU the min/max instructions would not flush the
1177 // output and return the original value.
1178 //
1179 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1180 !Known.isKnownNeverSubnormal()) {
1181 DenormalMode Mode =
1182 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1183 if (Mode != DenormalMode::getIEEE())
1184 Known.KnownFPClasses |= fcZero;
1185 }
1186
1187 if (Known.isKnownNeverNaN()) {
1188 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1189 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1190 if (*KnownLHS.SignBit)
1191 Known.signBitMustBeOne();
1192 else
1193 Known.signBitMustBeZero();
1194 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1195 Opcode == TargetOpcode::G_FMINIMUM) ||
1196 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1197 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1198 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1199 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1200 // FIXME: Should be using logical zero versions
1201 ((KnownLHS.isKnownNeverNegZero() ||
1202 KnownRHS.isKnownNeverPosZero()) &&
1203 (KnownLHS.isKnownNeverPosZero() ||
1204 KnownRHS.isKnownNeverNegZero()))) {
1205 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1206 Opcode == TargetOpcode::G_FMAXNUM ||
1207 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1208 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1209 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1210 Known.signBitMustBeZero();
1211 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1212 Opcode == TargetOpcode::G_FMINNUM ||
1213 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1214 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1215 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1216 Known.signBitMustBeOne();
1217 }
1218 }
1219 break;
1220 }
1221 case TargetOpcode::G_FCANONICALIZE: {
1222 Register Val = MI.getOperand(i: 1).getReg();
1223 KnownFPClass KnownSrc;
1224 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1225 Depth: Depth + 1);
1226
1227 // This is essentially a stronger form of
1228 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1229 // actually have an IR canonicalization guarantee.
1230
1231 // Canonicalize may flush denormals to zero, so we have to consider the
1232 // denormal mode to preserve known-not-0 knowledge.
1233 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1234
1235 // Stronger version of propagateNaN
1236 // Canonicalize is guaranteed to quiet signaling nans.
1237 if (KnownSrc.isKnownNeverNaN())
1238 Known.knownNot(RuleOut: fcNan);
1239 else
1240 Known.knownNot(RuleOut: fcSNan);
1241
1242 // If the parent function flushes denormals, the canonical output cannot
1243 // be a denormal.
1244 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1245 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1246 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1247 if (DenormMode == DenormalMode::getIEEE()) {
1248 if (KnownSrc.isKnownNever(Mask: fcPosZero))
1249 Known.knownNot(RuleOut: fcPosZero);
1250 if (KnownSrc.isKnownNever(Mask: fcNegZero))
1251 Known.knownNot(RuleOut: fcNegZero);
1252 break;
1253 }
1254
1255 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1256 Known.knownNot(RuleOut: fcSubnormal);
1257
1258 if (DenormMode.Input == DenormalMode::PositiveZero ||
1259 (DenormMode.Output == DenormalMode::PositiveZero &&
1260 DenormMode.Input == DenormalMode::IEEE))
1261 Known.knownNot(RuleOut: fcNegZero);
1262
1263 break;
1264 }
1265 case TargetOpcode::G_VECREDUCE_FMAX:
1266 case TargetOpcode::G_VECREDUCE_FMIN:
1267 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1268 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1269 Register Val = MI.getOperand(i: 1).getReg();
1270 // reduce min/max will choose an element from one of the vector elements,
1271 // so we can infer and class information that is common to all elements.
1272
1273 Known =
1274 computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1);
1275 // Can only propagate sign if output is never NaN.
1276 if (!Known.isKnownNeverNaN())
1277 Known.SignBit.reset();
1278 break;
1279 }
1280 case TargetOpcode::G_TRUNC:
1281 case TargetOpcode::G_FFLOOR:
1282 case TargetOpcode::G_FCEIL:
1283 case TargetOpcode::G_FRINT:
1284 case TargetOpcode::G_FNEARBYINT:
1285 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1286 case TargetOpcode::G_INTRINSIC_ROUND: {
1287 Register Val = MI.getOperand(i: 1).getReg();
1288 KnownFPClass KnownSrc;
1289 FPClassTest InterestedSrcs = InterestedClasses;
1290 if (InterestedSrcs & fcPosFinite)
1291 InterestedSrcs |= fcPosFinite;
1292 if (InterestedSrcs & fcNegFinite)
1293 InterestedSrcs |= fcNegFinite;
1294 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1295
1296 // Integer results cannot be subnormal.
1297 Known.knownNot(RuleOut: fcSubnormal);
1298
1299 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
1300
1301 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1302
1303 // Negative round ups to 0 produce -0
1304 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
1305 Known.knownNot(RuleOut: fcPosFinite);
1306 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
1307 Known.knownNot(RuleOut: fcNegFinite);
1308
1309 break;
1310 }
1311 case TargetOpcode::G_FEXP:
1312 case TargetOpcode::G_FEXP2:
1313 case TargetOpcode::G_FEXP10: {
1314 Known.knownNot(RuleOut: fcNegative);
1315 if ((InterestedClasses & fcNan) == fcNone)
1316 break;
1317
1318 Register Val = MI.getOperand(i: 1).getReg();
1319 KnownFPClass KnownSrc;
1320 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1321 Depth: Depth + 1);
1322 if (KnownSrc.isKnownNeverNaN()) {
1323 Known.knownNot(RuleOut: fcNan);
1324 Known.signBitMustBeZero();
1325 }
1326
1327 break;
1328 }
1329 case TargetOpcode::G_FLOG:
1330 case TargetOpcode::G_FLOG2:
1331 case TargetOpcode::G_FLOG10: {
1332 // log(+inf) -> +inf
1333 // log([+-]0.0) -> -inf
1334 // log(-inf) -> nan
1335 // log(-x) -> nan
1336 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1337 break;
1338
1339 FPClassTest InterestedSrcs = InterestedClasses;
1340 if ((InterestedClasses & fcNegInf) != fcNone)
1341 InterestedSrcs |= fcZero | fcSubnormal;
1342 if ((InterestedClasses & fcNan) != fcNone)
1343 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1344
1345 Register Val = MI.getOperand(i: 1).getReg();
1346 KnownFPClass KnownSrc;
1347 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1348
1349 if (KnownSrc.isKnownNeverPosInfinity())
1350 Known.knownNot(RuleOut: fcPosInf);
1351
1352 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1353 Known.knownNot(RuleOut: fcNan);
1354
1355 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1356 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1357 DenormalMode Mode = MF->getDenormalMode(FPType: FltSem);
1358
1359 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1360 Known.knownNot(RuleOut: fcNegInf);
1361
1362 break;
1363 }
1364 case TargetOpcode::G_FPOWI: {
1365 if ((InterestedClasses & fcNegative) == fcNone)
1366 break;
1367
1368 Register Exp = MI.getOperand(i: 2).getReg();
1369 LLT ExpTy = MRI.getType(Reg: Exp);
1370 KnownBits ExponentKnownBits = getKnownBits(
1371 R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1372
1373 if (ExponentKnownBits.Zero[0]) { // Is even
1374 Known.knownNot(RuleOut: fcNegative);
1375 break;
1376 }
1377
1378 // Given that exp is an integer, here are the
1379 // ways that pow can return a negative value:
1380 //
1381 // pow(-x, exp) --> negative if exp is odd and x is negative.
1382 // pow(-0, exp) --> -inf if exp is negative odd.
1383 // pow(-0, exp) --> -0 if exp is positive odd.
1384 // pow(-inf, exp) --> -0 if exp is negative odd.
1385 // pow(-inf, exp) --> -inf if exp is positive odd.
1386 Register Val = MI.getOperand(i: 1).getReg();
1387 KnownFPClass KnownSrc;
1388 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: fcNegative, Known&: KnownSrc, Depth: Depth + 1);
1389 if (KnownSrc.isKnownNever(Mask: fcNegative))
1390 Known.knownNot(RuleOut: fcNegative);
1391 break;
1392 }
1393 case TargetOpcode::G_FLDEXP:
1394 case TargetOpcode::G_STRICT_FLDEXP: {
1395 Register Val = MI.getOperand(i: 1).getReg();
1396 KnownFPClass KnownSrc;
1397 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1398 Depth: Depth + 1);
1399 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
1400
1401 // Sign is preserved, but underflows may produce zeroes.
1402 if (KnownSrc.isKnownNever(Mask: fcNegative))
1403 Known.knownNot(RuleOut: fcNegative);
1404 else if (KnownSrc.cannotBeOrderedLessThanZero())
1405 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1406
1407 if (KnownSrc.isKnownNever(Mask: fcPositive))
1408 Known.knownNot(RuleOut: fcPositive);
1409 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1410 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1411
1412 // Can refine inf/zero handling based on the exponent operand.
1413 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1414 if ((InterestedClasses & ExpInfoMask) == fcNone)
1415 break;
1416 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1417 break;
1418
1419 // TODO: Handle constant range of Exp
1420
1421 break;
1422 }
1423 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1424 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1425 Depth);
1426 break;
1427 }
1428 case TargetOpcode::G_FADD:
1429 case TargetOpcode::G_STRICT_FADD:
1430 case TargetOpcode::G_FSUB:
1431 case TargetOpcode::G_STRICT_FSUB: {
1432 Register LHS = MI.getOperand(i: 1).getReg();
1433 Register RHS = MI.getOperand(i: 2).getReg();
1434 KnownFPClass KnownLHS, KnownRHS;
1435 bool WantNegative =
1436 (Opcode == TargetOpcode::G_FADD ||
1437 Opcode == TargetOpcode::G_STRICT_FADD) &&
1438 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1439 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1440 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1441
1442 if (!WantNaN && !WantNegative && !WantNegZero)
1443 break;
1444
1445 FPClassTest InterestedSrcs = InterestedClasses;
1446 if (WantNegative)
1447 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1448 if (InterestedClasses & fcNan)
1449 InterestedSrcs |= fcInf;
1450 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1);
1451
1452 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1453 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1454 WantNegZero ||
1455 (Opcode == TargetOpcode::G_FSUB ||
1456 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1457
1458 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1459 // there's no point.
1460 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS,
1461 Depth: Depth + 1);
1462 // Adding positive and negative infinity produces NaN.
1463 // TODO: Check sign of infinities.
1464 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1465 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1466 Known.knownNot(RuleOut: fcNan);
1467
1468 if (Opcode == TargetOpcode::G_FADD ||
1469 Opcode == TargetOpcode::G_STRICT_FADD) {
1470 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1471 KnownRHS.cannotBeOrderedLessThanZero())
1472 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1473
1474 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1475 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1476 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1477 KnownRHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1478 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1479 // Make sure output negative denormal can't flush to -0
1480 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1481 Known.knownNot(RuleOut: fcNegZero);
1482 } else {
1483 // Only fsub -0, +0 can return -0
1484 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1485 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1486 KnownRHS.isKnownNeverLogicalPosZero(Mode: MF->getDenormalMode(
1487 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1488 // Make sure output negative denormal can't flush to -0
1489 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1490 Known.knownNot(RuleOut: fcNegZero);
1491 }
1492 }
1493
1494 break;
1495 }
1496 case TargetOpcode::G_FMUL:
1497 case TargetOpcode::G_STRICT_FMUL: {
1498 Register LHS = MI.getOperand(i: 1).getReg();
1499 Register RHS = MI.getOperand(i: 2).getReg();
1500 // X * X is always non-negative or a NaN.
1501 if (LHS == RHS)
1502 Known.knownNot(RuleOut: fcNegative);
1503
1504 if ((InterestedClasses & fcNan) != fcNan)
1505 break;
1506
1507 // fcSubnormal is only needed in case of DAZ.
1508 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1509
1510 KnownFPClass KnownLHS, KnownRHS;
1511 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS, Depth: Depth + 1);
1512 if (!KnownRHS.isKnownNeverNaN())
1513 break;
1514
1515 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS, Depth: Depth + 1);
1516 if (!KnownLHS.isKnownNeverNaN())
1517 break;
1518
1519 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1520 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1521 Known.signBitMustBeZero();
1522 else
1523 Known.signBitMustBeOne();
1524 }
1525
1526 // If 0 * +/-inf produces NaN.
1527 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1528 Known.knownNot(RuleOut: fcNan);
1529 break;
1530 }
1531
1532 if ((KnownRHS.isKnownNeverInfinity() ||
1533 KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1534 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1535 (KnownLHS.isKnownNeverInfinity() ||
1536 KnownRHS.isKnownNeverLogicalZero(
1537 Mode: MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))
1538 Known.knownNot(RuleOut: fcNan);
1539
1540 break;
1541 }
1542 case TargetOpcode::G_FDIV:
1543 case TargetOpcode::G_FREM: {
1544 Register LHS = MI.getOperand(i: 1).getReg();
1545 Register RHS = MI.getOperand(i: 2).getReg();
1546
1547 if (LHS == RHS) {
1548 // TODO: Could filter out snan if we inspect the operand
1549 if (Opcode == TargetOpcode::G_FDIV) {
1550 // X / X is always exactly 1.0 or a NaN.
1551 Known.KnownFPClasses = fcNan | fcPosNormal;
1552 } else {
1553 // X % X is always exactly [+-]0.0 or a NaN.
1554 Known.KnownFPClasses = fcNan | fcZero;
1555 }
1556
1557 break;
1558 }
1559
1560 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1561 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1562 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1563 (InterestedClasses & fcPositive) != fcNone;
1564 if (!WantNan && !WantNegative && !WantPositive)
1565 break;
1566
1567 KnownFPClass KnownLHS, KnownRHS;
1568
1569 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative,
1570 Known&: KnownRHS, Depth: Depth + 1);
1571
1572 bool KnowSomethingUseful =
1573 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
1574
1575 if (KnowSomethingUseful || WantPositive) {
1576 const FPClassTest InterestedLHS =
1577 WantPositive ? fcAllFlags
1578 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
1579
1580 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & InterestedLHS,
1581 Known&: KnownLHS, Depth: Depth + 1);
1582 }
1583
1584 if (Opcode == TargetOpcode::G_FDIV) {
1585 // Only 0/0, Inf/Inf produce NaN.
1586 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1587 (KnownLHS.isKnownNeverInfinity() ||
1588 KnownRHS.isKnownNeverInfinity()) &&
1589 ((KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1590 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) ||
1591 (KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1592 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))) {
1593 Known.knownNot(RuleOut: fcNan);
1594 }
1595
1596 // X / -0.0 is -Inf (or NaN).
1597 // +X / +X is +X
1598 if (KnownLHS.isKnownNever(Mask: fcNegative) &&
1599 KnownRHS.isKnownNever(Mask: fcNegative))
1600 Known.knownNot(RuleOut: fcNegative);
1601 } else {
1602 // Inf REM x and x REM 0 produce NaN.
1603 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1604 KnownLHS.isKnownNeverInfinity() &&
1605 KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1606 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) {
1607 Known.knownNot(RuleOut: fcNan);
1608 }
1609
1610 // The sign for frem is the same as the first operand.
1611 if (KnownLHS.cannotBeOrderedLessThanZero())
1612 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1613 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1614 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1615
1616 // See if we can be more aggressive about the sign of 0.
1617 if (KnownLHS.isKnownNever(Mask: fcNegative))
1618 Known.knownNot(RuleOut: fcNegative);
1619 if (KnownLHS.isKnownNever(Mask: fcPositive))
1620 Known.knownNot(RuleOut: fcPositive);
1621 }
1622
1623 break;
1624 }
1625 case TargetOpcode::G_FPEXT: {
1626 Register Dst = MI.getOperand(i: 0).getReg();
1627 Register Src = MI.getOperand(i: 1).getReg();
1628 // Infinity, nan and zero propagate from source.
1629 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1630
1631 LLT DstTy = MRI.getType(Reg: Dst).getScalarType();
1632 const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstTy);
1633 LLT SrcTy = MRI.getType(Reg: Src).getScalarType();
1634 const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy);
1635
1636 // All subnormal inputs should be in the normal range in the result type.
1637 if (APFloat::isRepresentableAsNormalIn(Src: SrcSem, Dst: DstSem)) {
1638 if (Known.KnownFPClasses & fcPosSubnormal)
1639 Known.KnownFPClasses |= fcPosNormal;
1640 if (Known.KnownFPClasses & fcNegSubnormal)
1641 Known.KnownFPClasses |= fcNegNormal;
1642 Known.knownNot(RuleOut: fcSubnormal);
1643 }
1644
1645 // Sign bit of a nan isn't guaranteed.
1646 if (!Known.isKnownNeverNaN())
1647 Known.SignBit = std::nullopt;
1648 break;
1649 }
1650 case TargetOpcode::G_FPTRUNC: {
1651 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1652 Depth);
1653 break;
1654 }
1655 case TargetOpcode::G_SITOFP:
1656 case TargetOpcode::G_UITOFP: {
1657 // Cannot produce nan
1658 Known.knownNot(RuleOut: fcNan);
1659
1660 // Integers cannot be subnormal
1661 Known.knownNot(RuleOut: fcSubnormal);
1662
1663 // sitofp and uitofp turn into +0.0 for zero.
1664 Known.knownNot(RuleOut: fcNegZero);
1665 if (Opcode == TargetOpcode::G_UITOFP)
1666 Known.signBitMustBeZero();
1667
1668 Register Val = MI.getOperand(i: 1).getReg();
1669 LLT Ty = MRI.getType(Reg: Val);
1670
1671 if (InterestedClasses & fcInf) {
1672 // Get width of largest magnitude integer (remove a bit if signed).
1673 // This still works for a signed minimum value because the largest FP
1674 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1675 int IntSize = Ty.getScalarSizeInBits();
1676 if (Opcode == TargetOpcode::G_SITOFP)
1677 --IntSize;
1678
1679 // If the exponent of the largest finite FP value can hold the largest
1680 // integer, the result of the cast must be finite.
1681 LLT FPTy = DstTy.getScalarType();
1682 const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy);
1683 if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize)
1684 Known.knownNot(RuleOut: fcInf);
1685 }
1686
1687 break;
1688 }
1689 // case TargetOpcode::G_MERGE_VALUES:
1690 case TargetOpcode::G_BUILD_VECTOR:
1691 case TargetOpcode::G_CONCAT_VECTORS: {
1692 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI);
1693
1694 if (!DstTy.isFixedVector())
1695 break;
1696
1697 bool First = true;
1698 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1699 // We know the index we are inserting to, so clear it from Vec check.
1700 bool NeedsElt = DemandedElts[Idx];
1701
1702 // Do we demand the inserted element?
1703 if (NeedsElt) {
1704 Register Src = Merge.getSourceReg(I: Idx);
1705 if (First) {
1706 computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1);
1707 First = false;
1708 } else {
1709 KnownFPClass Known2;
1710 computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1);
1711 Known |= Known2;
1712 }
1713
1714 // If we don't know any bits, early out.
1715 if (Known.isUnknown())
1716 break;
1717 }
1718 }
1719
1720 break;
1721 }
1722 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1723 // Look through extract element. If the index is non-constant or
1724 // out-of-range demand all elements, otherwise just the extracted
1725 // element.
1726 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
1727 Register Vec = Extract.getVectorReg();
1728 Register Idx = Extract.getIndexReg();
1729
1730 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1731
1732 LLT VecTy = MRI.getType(Reg: Vec);
1733
1734 if (VecTy.isFixedVector()) {
1735 unsigned NumElts = VecTy.getNumElements();
1736 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1737 if (CIdx && CIdx->ult(RHS: NumElts))
1738 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1739 return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
1740 Depth: Depth + 1);
1741 }
1742
1743 break;
1744 }
1745 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1746 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
1747 Register Vec = Insert.getVectorReg();
1748 Register Elt = Insert.getElementReg();
1749 Register Idx = Insert.getIndexReg();
1750
1751 LLT VecTy = MRI.getType(Reg: Vec);
1752
1753 if (VecTy.isScalableVector())
1754 return;
1755
1756 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1757
1758 unsigned NumElts = DemandedElts.getBitWidth();
1759 APInt DemandedVecElts = DemandedElts;
1760 bool NeedsElt = true;
1761 // If we know the index we are inserting to, clear it from Vec check.
1762 if (CIdx && CIdx->ult(RHS: NumElts)) {
1763 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1764 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1765 }
1766
1767 // Do we demand the inserted element?
1768 if (NeedsElt) {
1769 computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1);
1770 // If we don't know any bits, early out.
1771 if (Known.isUnknown())
1772 break;
1773 } else {
1774 Known.KnownFPClasses = fcNone;
1775 }
1776
1777 // Do we need anymore elements from Vec?
1778 if (!DemandedVecElts.isZero()) {
1779 KnownFPClass Known2;
1780 computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
1781 Depth: Depth + 1);
1782 Known |= Known2;
1783 }
1784
1785 break;
1786 }
1787 case TargetOpcode::G_SHUFFLE_VECTOR: {
1788 // For undef elements, we don't know anything about the common state of
1789 // the shuffle result.
1790 GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI);
1791 APInt DemandedLHS, DemandedRHS;
1792 if (DstTy.isScalableVector()) {
1793 assert(DemandedElts == APInt(1, 1));
1794 DemandedLHS = DemandedRHS = DemandedElts;
1795 } else {
1796 if (!llvm::getShuffleDemandedElts(SrcWidth: DstTy.getNumElements(), Mask: Shuf.getMask(),
1797 DemandedElts, DemandedLHS,
1798 DemandedRHS)) {
1799 Known.resetAll();
1800 return;
1801 }
1802 }
1803
1804 if (!!DemandedLHS) {
1805 Register LHS = Shuf.getSrc1Reg();
1806 computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
1807 Depth: Depth + 1);
1808
1809 // If we don't know any bits, early out.
1810 if (Known.isUnknown())
1811 break;
1812 } else {
1813 Known.KnownFPClasses = fcNone;
1814 }
1815
1816 if (!!DemandedRHS) {
1817 KnownFPClass Known2;
1818 Register RHS = Shuf.getSrc2Reg();
1819 computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
1820 Depth: Depth + 1);
1821 Known |= Known2;
1822 }
1823 break;
1824 }
1825 case TargetOpcode::COPY: {
1826 Register Src = MI.getOperand(i: 1).getReg();
1827
1828 if (!Src.isVirtual())
1829 return;
1830
1831 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1832 break;
1833 }
1834 }
1835}
1836
1837KnownFPClass
1838GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1839 FPClassTest InterestedClasses,
1840 unsigned Depth) {
1841 KnownFPClass KnownClasses;
1842 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth);
1843 return KnownClasses;
1844}
1845
1846KnownFPClass GISelValueTracking::computeKnownFPClass(
1847 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1848 KnownFPClass Known;
1849 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1850 return Known;
1851}
1852
1853KnownFPClass GISelValueTracking::computeKnownFPClass(
1854 Register R, const APInt &DemandedElts, uint32_t Flags,
1855 FPClassTest InterestedClasses, unsigned Depth) {
1856 if (Flags & MachineInstr::MIFlag::FmNoNans)
1857 InterestedClasses &= ~fcNan;
1858 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1859 InterestedClasses &= ~fcInf;
1860
1861 KnownFPClass Result =
1862 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1863
1864 if (Flags & MachineInstr::MIFlag::FmNoNans)
1865 Result.KnownFPClasses &= ~fcNan;
1866 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1867 Result.KnownFPClasses &= ~fcInf;
1868 return Result;
1869}
1870
1871KnownFPClass GISelValueTracking::computeKnownFPClass(
1872 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1873 LLT Ty = MRI.getType(Reg: R);
1874 APInt DemandedElts =
1875 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
1876 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1877}
1878
1879/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1880unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1881 const APInt &DemandedElts,
1882 unsigned Depth) {
1883 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1884 unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth);
1885 if (Src1SignBits == 1)
1886 return 1;
1887 return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits);
1888}
1889
1890/// Compute the known number of sign bits with attached range metadata in the
1891/// memory operand. If this is an extending load, accounts for the behavior of
1892/// the high bits.
1893static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
1894 unsigned TyBits) {
1895 const MDNode *Ranges = Ld->getRanges();
1896 if (!Ranges)
1897 return 1;
1898
1899 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges);
1900 if (TyBits > CR.getBitWidth()) {
1901 switch (Ld->getOpcode()) {
1902 case TargetOpcode::G_SEXTLOAD:
1903 CR = CR.signExtend(BitWidth: TyBits);
1904 break;
1905 case TargetOpcode::G_ZEXTLOAD:
1906 CR = CR.zeroExtend(BitWidth: TyBits);
1907 break;
1908 default:
1909 break;
1910 }
1911 }
1912
1913 return std::min(a: CR.getSignedMin().getNumSignBits(),
1914 b: CR.getSignedMax().getNumSignBits());
1915}
1916
1917unsigned GISelValueTracking::computeNumSignBits(Register R,
1918 const APInt &DemandedElts,
1919 unsigned Depth) {
1920 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
1921 unsigned Opcode = MI.getOpcode();
1922
1923 if (Opcode == TargetOpcode::G_CONSTANT)
1924 return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits();
1925
1926 if (Depth == getMaxDepth())
1927 return 1;
1928
1929 if (!DemandedElts)
1930 return 1; // No demanded elts, better to assume we don't know anything.
1931
1932 LLT DstTy = MRI.getType(Reg: R);
1933 const unsigned TyBits = DstTy.getScalarSizeInBits();
1934
1935 // Handle the case where this is called on a register that does not have a
1936 // type constraint. This is unlikely to occur except by looking through copies
1937 // but it is possible for the initial register being queried to be in this
1938 // state.
1939 if (!DstTy.isValid())
1940 return 1;
1941
1942 unsigned FirstAnswer = 1;
1943 switch (Opcode) {
1944 case TargetOpcode::COPY: {
1945 MachineOperand &Src = MI.getOperand(i: 1);
1946 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1947 MRI.getType(Reg: Src.getReg()).isValid()) {
1948 // Don't increment Depth for this one since we didn't do any work.
1949 return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth);
1950 }
1951
1952 return 1;
1953 }
1954 case TargetOpcode::G_SEXT: {
1955 Register Src = MI.getOperand(i: 1).getReg();
1956 LLT SrcTy = MRI.getType(Reg: Src);
1957 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1958 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp;
1959 }
1960 case TargetOpcode::G_ASSERT_SEXT:
1961 case TargetOpcode::G_SEXT_INREG: {
1962 // Max of the input and what this extends.
1963 Register Src = MI.getOperand(i: 1).getReg();
1964 unsigned SrcBits = MI.getOperand(i: 2).getImm();
1965 unsigned InRegBits = TyBits - SrcBits + 1;
1966 return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1),
1967 b: InRegBits);
1968 }
1969 case TargetOpcode::G_LOAD: {
1970 GLoad *Ld = cast<GLoad>(Val: &MI);
1971 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1972 break;
1973
1974 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1975 }
1976 case TargetOpcode::G_SEXTLOAD: {
1977 GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI);
1978
1979 // FIXME: We need an in-memory type representation.
1980 if (DstTy.isVector())
1981 return 1;
1982
1983 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1984 if (NumBits != 1)
1985 return NumBits;
1986
1987 // e.g. i16->i32 = '17' bits known.
1988 const MachineMemOperand *MMO = *MI.memoperands_begin();
1989 return TyBits - MMO->getSizeInBits().getValue() + 1;
1990 }
1991 case TargetOpcode::G_ZEXTLOAD: {
1992 GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI);
1993
1994 // FIXME: We need an in-memory type representation.
1995 if (DstTy.isVector())
1996 return 1;
1997
1998 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1999 if (NumBits != 1)
2000 return NumBits;
2001
2002 // e.g. i16->i32 = '16' bits known.
2003 const MachineMemOperand *MMO = *MI.memoperands_begin();
2004 return TyBits - MMO->getSizeInBits().getValue();
2005 }
2006 case TargetOpcode::G_AND:
2007 case TargetOpcode::G_OR:
2008 case TargetOpcode::G_XOR: {
2009 Register Src1 = MI.getOperand(i: 1).getReg();
2010 unsigned Src1NumSignBits =
2011 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2012 if (Src1NumSignBits != 1) {
2013 Register Src2 = MI.getOperand(i: 2).getReg();
2014 unsigned Src2NumSignBits =
2015 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2016 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits);
2017 }
2018 break;
2019 }
2020 case TargetOpcode::G_ASHR: {
2021 Register Src1 = MI.getOperand(i: 1).getReg();
2022 Register Src2 = MI.getOperand(i: 2).getReg();
2023 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2024 if (auto C = getValidMinimumShiftAmount(R: Src2, DemandedElts, Depth: Depth + 1))
2025 FirstAnswer = std::min<uint64_t>(a: FirstAnswer + *C, b: TyBits);
2026 break;
2027 }
2028 case TargetOpcode::G_SHL: {
2029 Register Src1 = MI.getOperand(i: 1).getReg();
2030 Register Src2 = MI.getOperand(i: 2).getReg();
2031 if (std::optional<ConstantRange> ShAmtRange =
2032 getValidShiftAmountRange(R: Src2, DemandedElts, Depth: Depth + 1)) {
2033 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
2034 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
2035
2036 MachineInstr &ExtMI = *MRI.getVRegDef(Reg: Src1);
2037 unsigned ExtOpc = ExtMI.getOpcode();
2038
2039 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
2040 // shifted out, then we can compute the number of sign bits for the
2041 // operand being extended. A future improvement could be to pass along the
2042 // "shifted left by" information in the recursive calls to
2043 // ComputeKnownSignBits. Allowing us to handle this more generically.
2044 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
2045 ExtOpc == TargetOpcode::G_ANYEXT) {
2046 LLT ExtTy = MRI.getType(Reg: Src1);
2047 Register Extendee = ExtMI.getOperand(i: 1).getReg();
2048 LLT ExtendeeTy = MRI.getType(Reg: Extendee);
2049 uint64_t SizeDiff =
2050 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
2051
2052 if (SizeDiff <= MinShAmt) {
2053 unsigned Tmp =
2054 SizeDiff + computeNumSignBits(R: Extendee, DemandedElts, Depth: Depth + 1);
2055 if (MaxShAmt < Tmp)
2056 return Tmp - MaxShAmt;
2057 }
2058 }
2059 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
2060 unsigned Tmp = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2061 if (MaxShAmt < Tmp)
2062 return Tmp - MaxShAmt;
2063 }
2064 break;
2065 }
2066 case TargetOpcode::G_TRUNC: {
2067 Register Src = MI.getOperand(i: 1).getReg();
2068 LLT SrcTy = MRI.getType(Reg: Src);
2069
2070 // Check if the sign bits of source go down as far as the truncated value.
2071 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2072 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2073 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2074 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2075 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2076 break;
2077 }
2078 case TargetOpcode::G_SELECT: {
2079 return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(),
2080 Src1: MI.getOperand(i: 3).getReg(), DemandedElts,
2081 Depth: Depth + 1);
2082 }
2083 case TargetOpcode::G_SMIN:
2084 case TargetOpcode::G_SMAX:
2085 case TargetOpcode::G_UMIN:
2086 case TargetOpcode::G_UMAX:
2087 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2088 return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(),
2089 Src1: MI.getOperand(i: 2).getReg(), DemandedElts,
2090 Depth: Depth + 1);
2091 case TargetOpcode::G_SADDO:
2092 case TargetOpcode::G_SADDE:
2093 case TargetOpcode::G_UADDO:
2094 case TargetOpcode::G_UADDE:
2095 case TargetOpcode::G_SSUBO:
2096 case TargetOpcode::G_SSUBE:
2097 case TargetOpcode::G_USUBO:
2098 case TargetOpcode::G_USUBE:
2099 case TargetOpcode::G_SMULO:
2100 case TargetOpcode::G_UMULO: {
2101 // If compares returns 0/-1, all bits are sign bits.
2102 // We know that we have an integer-based boolean since these operations
2103 // are only available for integer.
2104 if (MI.getOperand(i: 1).getReg() == R) {
2105 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
2106 TargetLowering::ZeroOrNegativeOneBooleanContent)
2107 return TyBits;
2108 }
2109
2110 break;
2111 }
2112 case TargetOpcode::G_SUB: {
2113 Register Src2 = MI.getOperand(i: 2).getReg();
2114 unsigned Src2NumSignBits =
2115 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2116 if (Src2NumSignBits == 1)
2117 return 1; // Early out.
2118
2119 // Handle NEG.
2120 Register Src1 = MI.getOperand(i: 1).getReg();
2121 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2122 if (Known1.isZero()) {
2123 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2124 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2125 // sign bits set.
2126 if ((Known2.Zero | 1).isAllOnes())
2127 return TyBits;
2128
2129 // If the input is known to be positive (the sign bit is known clear),
2130 // the output of the NEG has, at worst, the same number of sign bits as
2131 // the input.
2132 if (Known2.isNonNegative()) {
2133 FirstAnswer = Src2NumSignBits;
2134 break;
2135 }
2136
2137 // Otherwise, we treat this like a SUB.
2138 }
2139
2140 unsigned Src1NumSignBits =
2141 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2142 if (Src1NumSignBits == 1)
2143 return 1; // Early Out.
2144
2145 // Sub can have at most one carry bit. Thus we know that the output
2146 // is, at worst, one more bit than the inputs.
2147 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2148 break;
2149 }
2150 case TargetOpcode::G_ADD: {
2151 Register Src2 = MI.getOperand(i: 2).getReg();
2152 unsigned Src2NumSignBits =
2153 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2154 if (Src2NumSignBits <= 2)
2155 return 1; // Early out.
2156
2157 Register Src1 = MI.getOperand(i: 1).getReg();
2158 unsigned Src1NumSignBits =
2159 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2160 if (Src1NumSignBits == 1)
2161 return 1; // Early Out.
2162
2163 // Special case decrementing a value (ADD X, -1):
2164 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2165 if (Known2.isAllOnes()) {
2166 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2167 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2168 // sign bits set.
2169 if ((Known1.Zero | 1).isAllOnes())
2170 return TyBits;
2171
2172 // If we are subtracting one from a positive number, there is no carry
2173 // out of the result.
2174 if (Known1.isNonNegative()) {
2175 FirstAnswer = Src1NumSignBits;
2176 break;
2177 }
2178
2179 // Otherwise, we treat this like an ADD.
2180 }
2181
2182 // Add can have at most one carry bit. Thus we know that the output
2183 // is, at worst, one more bit than the inputs.
2184 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2185 break;
2186 }
2187 case TargetOpcode::G_FCMP:
2188 case TargetOpcode::G_ICMP: {
2189 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2190 if (TyBits == 1)
2191 break;
2192 auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP);
2193 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
2194 return TyBits; // All bits are sign bits.
2195 if (BC == TargetLowering::ZeroOrOneBooleanContent)
2196 return TyBits - 1; // Every always-zero bit is a sign bit.
2197 break;
2198 }
2199 case TargetOpcode::G_BUILD_VECTOR: {
2200 // Collect the known bits that are shared by every demanded vector element.
2201 FirstAnswer = TyBits;
2202 APInt SingleDemandedElt(1, 1);
2203 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2204 if (!DemandedElts[I])
2205 continue;
2206
2207 unsigned Tmp2 =
2208 computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1);
2209 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2210
2211 // If we don't know any bits, early out.
2212 if (FirstAnswer == 1)
2213 break;
2214 }
2215 break;
2216 }
2217 case TargetOpcode::G_CONCAT_VECTORS: {
2218 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
2219 break;
2220 FirstAnswer = TyBits;
2221 // Determine the minimum number of sign bits across all demanded
2222 // elts of the input vectors. Early out if the result is already 1.
2223 unsigned NumSubVectorElts =
2224 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
2225 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2226 APInt DemandedSub =
2227 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
2228 if (!DemandedSub)
2229 continue;
2230 unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1);
2231
2232 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2233
2234 // If we don't know any bits, early out.
2235 if (FirstAnswer == 1)
2236 break;
2237 }
2238 break;
2239 }
2240 case TargetOpcode::G_SHUFFLE_VECTOR: {
2241 // Collect the minimum number of sign bits that are shared by every vector
2242 // element referenced by the shuffle.
2243 APInt DemandedLHS, DemandedRHS;
2244 Register Src1 = MI.getOperand(i: 1).getReg();
2245 unsigned NumElts = MRI.getType(Reg: Src1).getNumElements();
2246 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
2247 DemandedElts, DemandedLHS, DemandedRHS))
2248 return 1;
2249
2250 if (!!DemandedLHS)
2251 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1);
2252 // If we don't know anything, early out and try computeKnownBits fall-back.
2253 if (FirstAnswer == 1)
2254 break;
2255 if (!!DemandedRHS) {
2256 unsigned Tmp2 =
2257 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1);
2258 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2259 }
2260 break;
2261 }
2262 case TargetOpcode::G_SPLAT_VECTOR: {
2263 // Check if the sign bits of source go down as far as the truncated value.
2264 Register Src = MI.getOperand(i: 1).getReg();
2265 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1);
2266 unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits();
2267 if (NumSrcSignBits > (NumSrcBits - TyBits))
2268 return NumSrcSignBits - (NumSrcBits - TyBits);
2269 break;
2270 }
2271 case TargetOpcode::G_INTRINSIC:
2272 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2273 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2274 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2275 default: {
2276 unsigned NumBits =
2277 TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth);
2278 if (NumBits > 1)
2279 FirstAnswer = std::max(a: FirstAnswer, b: NumBits);
2280 break;
2281 }
2282 }
2283
2284 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2285 // use this information.
2286 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2287 APInt Mask;
2288 if (Known.isNonNegative()) { // sign bit is 0
2289 Mask = Known.Zero;
2290 } else if (Known.isNegative()) { // sign bit is 1;
2291 Mask = Known.One;
2292 } else {
2293 // Nothing known.
2294 return FirstAnswer;
2295 }
2296
2297 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2298 // the number of identical bits in the top of the input value.
2299 Mask <<= Mask.getBitWidth() - TyBits;
2300 return std::max(a: FirstAnswer, b: Mask.countl_one());
2301}
2302
2303unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2304 LLT Ty = MRI.getType(Reg: R);
2305 APInt DemandedElts =
2306 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
2307 return computeNumSignBits(R, DemandedElts, Depth);
2308}
2309
2310std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
2311 Register R, const APInt &DemandedElts, unsigned Depth) {
2312 // Shifting more than the bitwidth is not valid.
2313 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2314 unsigned Opcode = MI.getOpcode();
2315
2316 LLT Ty = MRI.getType(Reg: R);
2317 unsigned BitWidth = Ty.getScalarSizeInBits();
2318
2319 if (Opcode == TargetOpcode::G_CONSTANT) {
2320 const APInt &ShAmt = MI.getOperand(i: 1).getCImm()->getValue();
2321 if (ShAmt.uge(RHS: BitWidth))
2322 return std::nullopt;
2323 return ConstantRange(ShAmt);
2324 }
2325
2326 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2327 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2328 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2329 if (!DemandedElts[I])
2330 continue;
2331 MachineInstr *Op = MRI.getVRegDef(Reg: MI.getOperand(i: I + 1).getReg());
2332 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2333 MinAmt = MaxAmt = nullptr;
2334 break;
2335 }
2336
2337 const APInt &ShAmt = Op->getOperand(i: 1).getCImm()->getValue();
2338 if (ShAmt.uge(RHS: BitWidth))
2339 return std::nullopt;
2340 if (!MinAmt || MinAmt->ugt(RHS: ShAmt))
2341 MinAmt = &ShAmt;
2342 if (!MaxAmt || MaxAmt->ult(RHS: ShAmt))
2343 MaxAmt = &ShAmt;
2344 }
2345 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2346 "Failed to find matching min/max shift amounts");
2347 if (MinAmt && MaxAmt)
2348 return ConstantRange(*MinAmt, *MaxAmt + 1);
2349 }
2350
2351 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2352 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2353 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2354 if (KnownAmt.getMaxValue().ult(RHS: BitWidth))
2355 return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false);
2356
2357 return std::nullopt;
2358}
2359
2360std::optional<uint64_t> GISelValueTracking::getValidMinimumShiftAmount(
2361 Register R, const APInt &DemandedElts, unsigned Depth) {
2362 if (std::optional<ConstantRange> AmtRange =
2363 getValidShiftAmountRange(R, DemandedElts, Depth))
2364 return AmtRange->getUnsignedMin().getZExtValue();
2365 return std::nullopt;
2366}
2367
2368void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2369 AnalysisUsage &AU) const {
2370 AU.setPreservesAll();
2371 MachineFunctionPass::getAnalysisUsage(AU);
2372}
2373
2374bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2375 MachineFunction &MF) {
2376 return false;
2377}
2378
2379GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2380 if (!Info) {
2381 unsigned MaxDepth =
2382 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2383 Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth);
2384 }
2385 return *Info;
2386}
2387
2388AnalysisKey GISelValueTrackingAnalysis::Key;
2389
2390GISelValueTracking
2391GISelValueTrackingAnalysis::run(MachineFunction &MF,
2392 MachineFunctionAnalysisManager &MFAM) {
2393 return Result(MF);
2394}
2395
2396PreservedAnalyses
2397GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2398 MachineFunctionAnalysisManager &MFAM) {
2399 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
2400 const auto &MRI = MF.getRegInfo();
2401 OS << "name: ";
2402 MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false);
2403 OS << '\n';
2404
2405 for (MachineBasicBlock &BB : MF) {
2406 for (MachineInstr &MI : BB) {
2407 for (MachineOperand &MO : MI.defs()) {
2408 if (!MO.isReg() || MO.getReg().isPhysical())
2409 continue;
2410 Register Reg = MO.getReg();
2411 if (!MRI.getType(Reg).isValid())
2412 continue;
2413 KnownBits Known = VTA.getKnownBits(R: Reg);
2414 unsigned SignedBits = VTA.computeNumSignBits(R: Reg);
2415 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2416 << '\n';
2417 };
2418 }
2419 }
2420 return PreservedAnalyses::all();
2421}
2422