1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
14#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/FloatingPointMode.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/IR/ConstantRange.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/FMF.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/KnownBits.h"
39#include "llvm/Support/KnownFPClass.h"
40#include "llvm/Target/TargetMachine.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
47char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
48
49INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
50 "Analysis for ComputingKnownBits", false, true)
51
52GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
56Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
57 const MachineInstr *MI = MRI.getVRegDef(Reg: R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(i: 2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(i: 1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1);
75 }
76}
77
78KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(R: MI.getOperand(i: 0).getReg());
82}
83
84KnownBits GISelValueTracking::getKnownBits(Register R) {
85 const LLT Ty = MRI.getType(Reg: R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
94KnownBits GISelValueTracking::getKnownBits(Register R,
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
102bool GISelValueTracking::signBitIsZero(Register R) {
103 LLT Ty = MRI.getType(Reg: R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
105 return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth));
106}
107
108APInt GISelValueTracking::getKnownZeroes(Register R) {
109 return getKnownBits(R).Zero;
110}
111
112APInt GISelValueTracking::getKnownOnes(Register R) {
113 return getKnownBits(R).One;
114}
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(RHS: Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
154 numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth));
155 Mask.One = APInt::getLowBitsSet(
156 numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth));
157 return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask;
158}
159
160void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(Reg: R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(RHS: Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1),
231 Depth: Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
240 Known.One = APInt::getAllOnes(numBits: BitWidth);
241 Known.Zero = APInt::getAllOnes(numBits: BitWidth);
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(i: Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 APInt NowDemandedElts;
262 if (!SrcTy.isFixedVector()) {
263 NowDemandedElts = APInt(1, 1);
264 } else if (DstTy.isFixedVector() &&
265 SrcTy.getNumElements() == DstTy.getNumElements()) {
266 NowDemandedElts = DemandedElts;
267 } else {
268 NowDemandedElts = APInt::getAllOnes(numBits: SrcTy.getNumElements());
269 }
270
271 // For COPYs we don't do anything, don't increase the depth.
272 computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts: NowDemandedElts,
273 Depth: Depth + (Opcode != TargetOpcode::COPY));
274 Known2 = Known2.anyextOrTrunc(BitWidth);
275 Known = Known.intersectWith(RHS: Known2);
276 // If we reach a point where we don't know anything
277 // just stop looking through the operands.
278 if (Known.isUnknown())
279 break;
280 } else {
281 // We know nothing.
282 Known = KnownBits(BitWidth);
283 break;
284 }
285 }
286 break;
287 }
288 case TargetOpcode::G_CONSTANT: {
289 Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue());
290 break;
291 }
292 case TargetOpcode::G_FRAME_INDEX: {
293 int FrameIdx = MI.getOperand(i: 1).getIndex();
294 TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF);
295 break;
296 }
297 case TargetOpcode::G_SUB: {
298 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
299 Depth: Depth + 1);
300 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
301 Depth: Depth + 1);
302 Known = KnownBits::sub(LHS: Known, RHS: Known2, NSW: MI.getFlag(Flag: MachineInstr::NoSWrap),
303 NUW: MI.getFlag(Flag: MachineInstr::NoUWrap));
304 break;
305 }
306 case TargetOpcode::G_XOR: {
307 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
308 Depth: Depth + 1);
309 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
310 Depth: Depth + 1);
311
312 Known ^= Known2;
313 break;
314 }
315 case TargetOpcode::G_PTR_ADD: {
316 if (DstTy.isVector())
317 break;
318 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
319 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
320 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
321 break;
322 [[fallthrough]];
323 }
324 case TargetOpcode::G_ADD: {
325 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
326 Depth: Depth + 1);
327 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
328 Depth: Depth + 1);
329 Known = KnownBits::add(LHS: Known, RHS: Known2);
330 break;
331 }
332 case TargetOpcode::G_AND: {
333 // If either the LHS or the RHS are Zero, the result is zero.
334 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
335 Depth: Depth + 1);
336 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
337 Depth: Depth + 1);
338
339 Known &= Known2;
340 break;
341 }
342 case TargetOpcode::G_OR: {
343 // If either the LHS or the RHS are Zero, the result is zero.
344 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
345 Depth: Depth + 1);
346 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
347 Depth: Depth + 1);
348
349 Known |= Known2;
350 break;
351 }
352 case TargetOpcode::G_MUL: {
353 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
354 Depth: Depth + 1);
355 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
356 Depth: Depth + 1);
357 Known = KnownBits::mul(LHS: Known, RHS: Known2);
358 break;
359 }
360 case TargetOpcode::G_UMULH: {
361 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
362 Depth: Depth + 1);
363 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
364 Depth: Depth + 1);
365 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
366 break;
367 }
368 case TargetOpcode::G_SMULH: {
369 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
370 Depth: Depth + 1);
371 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
372 Depth: Depth + 1);
373 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
374 break;
375 }
376 case TargetOpcode::G_ABDU: {
377 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
378 Depth: Depth + 1);
379 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
380 Depth: Depth + 1);
381 Known = KnownBits::abdu(LHS: Known, RHS: Known2);
382 break;
383 }
384 case TargetOpcode::G_ABDS: {
385 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
386 Depth: Depth + 1);
387 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
388 Depth: Depth + 1);
389 Known = KnownBits::abds(LHS: Known, RHS: Known2);
390
391 unsigned SignBits1 =
392 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts, Depth: Depth + 1);
393 if (SignBits1 == 1) {
394 break;
395 }
396 unsigned SignBits0 =
397 computeNumSignBits(R: MI.getOperand(i: 1).getReg(), DemandedElts, Depth: Depth + 1);
398
399 Known.Zero.setHighBits(std::min(a: SignBits0, b: SignBits1) - 1);
400 break;
401 }
402 case TargetOpcode::G_UDIV: {
403 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
404 Depth: Depth + 1);
405 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
406 Depth: Depth + 1);
407 Known = KnownBits::udiv(LHS: Known, RHS: Known2,
408 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
409 break;
410 }
411 case TargetOpcode::G_SDIV: {
412 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
413 Depth: Depth + 1);
414 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
415 Depth: Depth + 1);
416 Known = KnownBits::sdiv(LHS: Known, RHS: Known2,
417 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
418 break;
419 }
420 case TargetOpcode::G_SELECT: {
421 computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(),
422 Known, DemandedElts, Depth: Depth + 1);
423 break;
424 }
425 case TargetOpcode::G_SMIN: {
426 // TODO: Handle clamp pattern with number of sign bits
427 KnownBits KnownRHS;
428 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
429 Depth: Depth + 1);
430 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
431 Depth: Depth + 1);
432 Known = KnownBits::smin(LHS: Known, RHS: KnownRHS);
433 break;
434 }
435 case TargetOpcode::G_SMAX: {
436 // TODO: Handle clamp pattern with number of sign bits
437 KnownBits KnownRHS;
438 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
439 Depth: Depth + 1);
440 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
441 Depth: Depth + 1);
442 Known = KnownBits::smax(LHS: Known, RHS: KnownRHS);
443 break;
444 }
445 case TargetOpcode::G_UMIN: {
446 KnownBits KnownRHS;
447 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
448 Depth: Depth + 1);
449 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
450 Depth: Depth + 1);
451 Known = KnownBits::umin(LHS: Known, RHS: KnownRHS);
452 break;
453 }
454 case TargetOpcode::G_UMAX: {
455 KnownBits KnownRHS;
456 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
457 Depth: Depth + 1);
458 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
459 Depth: Depth + 1);
460 Known = KnownBits::umax(LHS: Known, RHS: KnownRHS);
461 break;
462 }
463 case TargetOpcode::G_FCMP:
464 case TargetOpcode::G_ICMP: {
465 if (DstTy.isVector())
466 break;
467 if (TL.getBooleanContents(isVec: DstTy.isVector(),
468 isFloat: Opcode == TargetOpcode::G_FCMP) ==
469 TargetLowering::ZeroOrOneBooleanContent &&
470 BitWidth > 1)
471 Known.Zero.setBitsFrom(1);
472 break;
473 }
474 case TargetOpcode::G_SEXT: {
475 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
476 Depth: Depth + 1);
477 // If the sign bit is known to be zero or one, then sext will extend
478 // it to the top bits, else it will just zext.
479 Known = Known.sext(BitWidth);
480 break;
481 }
482 case TargetOpcode::G_ASSERT_SEXT:
483 case TargetOpcode::G_SEXT_INREG: {
484 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
485 Depth: Depth + 1);
486 Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm());
487 break;
488 }
489 case TargetOpcode::G_ANYEXT: {
490 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
491 Depth: Depth + 1);
492 Known = Known.anyext(BitWidth);
493 break;
494 }
495 case TargetOpcode::G_LOAD: {
496 const MachineMemOperand *MMO = *MI.memoperands_begin();
497 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
498 if (const MDNode *Ranges = MMO->getRanges())
499 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
500 Known = KnownRange.anyext(BitWidth: Known.getBitWidth());
501 break;
502 }
503 case TargetOpcode::G_SEXTLOAD:
504 case TargetOpcode::G_ZEXTLOAD: {
505 if (DstTy.isVector())
506 break;
507 const MachineMemOperand *MMO = *MI.memoperands_begin();
508 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
509 if (const MDNode *Ranges = MMO->getRanges())
510 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
511 Known = Opcode == TargetOpcode::G_SEXTLOAD
512 ? KnownRange.sext(BitWidth: Known.getBitWidth())
513 : KnownRange.zext(BitWidth: Known.getBitWidth());
514 break;
515 }
516 case TargetOpcode::G_ASHR: {
517 KnownBits LHSKnown, RHSKnown;
518 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
519 Depth: Depth + 1);
520 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
521 Depth: Depth + 1);
522 Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown);
523 break;
524 }
525 case TargetOpcode::G_LSHR: {
526 KnownBits LHSKnown, RHSKnown;
527 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
528 Depth: Depth + 1);
529 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
530 Depth: Depth + 1);
531 Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown);
532 break;
533 }
534 case TargetOpcode::G_SHL: {
535 KnownBits LHSKnown, RHSKnown;
536 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
537 Depth: Depth + 1);
538 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
539 Depth: Depth + 1);
540 Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown);
541 break;
542 }
543 case TargetOpcode::G_ROTL:
544 case TargetOpcode::G_ROTR: {
545 MachineInstr *AmtOpMI = MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
546 auto MaybeAmtOp = isConstantOrConstantSplatVector(MI&: *AmtOpMI, MRI);
547 if (!MaybeAmtOp)
548 break;
549
550 Register SrcReg = MI.getOperand(i: 1).getReg();
551 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
552
553 unsigned Amt = MaybeAmtOp->urem(RHS: BitWidth);
554
555 // Canonicalize to ROTR.
556 if (Opcode == TargetOpcode::G_ROTL)
557 Amt = BitWidth - Amt;
558
559 Known.Zero = Known.Zero.rotr(rotateAmt: Amt);
560 Known.One = Known.One.rotr(rotateAmt: Amt);
561 break;
562 }
563 case TargetOpcode::G_INTTOPTR:
564 case TargetOpcode::G_PTRTOINT:
565 if (DstTy.isVector())
566 break;
567 // Fall through and handle them the same as zext/trunc.
568 [[fallthrough]];
569 case TargetOpcode::G_ZEXT:
570 case TargetOpcode::G_TRUNC: {
571 Register SrcReg = MI.getOperand(i: 1).getReg();
572 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
573 Known = Known.zextOrTrunc(BitWidth);
574 break;
575 }
576 case TargetOpcode::G_ASSERT_ZEXT: {
577 Register SrcReg = MI.getOperand(i: 1).getReg();
578 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
579
580 unsigned SrcBitWidth = MI.getOperand(i: 2).getImm();
581 assert(SrcBitWidth && "SrcBitWidth can't be zero");
582 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth);
583 Known.Zero |= (~InMask);
584 Known.One &= (~Known.Zero);
585 break;
586 }
587 case TargetOpcode::G_ASSERT_ALIGN: {
588 int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm());
589
590 // TODO: Should use maximum with source
591 // If a node is guaranteed to be aligned, set low zero bits accordingly as
592 // well as clearing one bits.
593 Known.Zero.setLowBits(LogOfAlign);
594 Known.One.clearLowBits(loBits: LogOfAlign);
595 break;
596 }
597 case TargetOpcode::G_MERGE_VALUES: {
598 unsigned NumOps = MI.getNumOperands();
599 unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
600
601 for (unsigned I = 0; I != NumOps - 1; ++I) {
602 KnownBits SrcOpKnown;
603 computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown,
604 DemandedElts, Depth: Depth + 1);
605 Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize);
606 }
607 break;
608 }
609 case TargetOpcode::G_UNMERGE_VALUES: {
610 unsigned NumOps = MI.getNumOperands();
611 Register SrcReg = MI.getOperand(i: NumOps - 1).getReg();
612 LLT SrcTy = MRI.getType(Reg: SrcReg);
613
614 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
615 return; // TODO: Handle vector->subelement unmerges
616
617 // Figure out the result operand index
618 unsigned DstIdx = 0;
619 for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R;
620 ++DstIdx)
621 ;
622
623 APInt SubDemandedElts = DemandedElts;
624 if (SrcTy.isVector()) {
625 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
626 SubDemandedElts =
627 DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes);
628 }
629
630 KnownBits SrcOpKnown;
631 computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1);
632
633 if (SrcTy.isVector())
634 Known = std::move(SrcOpKnown);
635 else
636 Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx);
637 break;
638 }
639 case TargetOpcode::G_BSWAP: {
640 Register SrcReg = MI.getOperand(i: 1).getReg();
641 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
642 Known = Known.byteSwap();
643 break;
644 }
645 case TargetOpcode::G_BITREVERSE: {
646 Register SrcReg = MI.getOperand(i: 1).getReg();
647 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
648 Known = Known.reverseBits();
649 break;
650 }
651 case TargetOpcode::G_CTPOP: {
652 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
653 Depth: Depth + 1);
654 // We can bound the space the count needs. Also, bits known to be zero
655 // can't contribute to the population.
656 unsigned BitsPossiblySet = Known2.countMaxPopulation();
657 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
658 Known.Zero.setBitsFrom(LowBits);
659 // TODO: we could bound Known.One using the lower bound on the number of
660 // bits which might be set provided by popcnt KnownOne2.
661 break;
662 }
663 case TargetOpcode::G_UBFX: {
664 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
665 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
666 Depth: Depth + 1);
667 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
668 Depth: Depth + 1);
669 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
670 Depth: Depth + 1);
671 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
672 break;
673 }
674 case TargetOpcode::G_SBFX: {
675 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
676 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
677 Depth: Depth + 1);
678 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
679 Depth: Depth + 1);
680 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
681 Depth: Depth + 1);
682 OffsetKnown = OffsetKnown.sext(BitWidth);
683 WidthKnown = WidthKnown.sext(BitWidth);
684 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
685 // Sign extend the extracted value using shift left and arithmetic shift
686 // right.
687 KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth));
688 KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown);
689 Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown);
690 break;
691 }
692 case TargetOpcode::G_UADDO:
693 case TargetOpcode::G_UADDE:
694 case TargetOpcode::G_SADDO:
695 case TargetOpcode::G_SADDE: {
696 if (MI.getOperand(i: 1).getReg() == R) {
697 // If we know the result of a compare has the top bits zero, use this
698 // info.
699 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
700 TargetLowering::ZeroOrOneBooleanContent &&
701 BitWidth > 1)
702 Known.Zero.setBitsFrom(1);
703 break;
704 }
705
706 assert(MI.getOperand(0).getReg() == R &&
707 "We only compute knownbits for the sum here.");
708 // With [US]ADDE, a carry bit may be added in.
709 KnownBits Carry(1);
710 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
711 computeKnownBitsImpl(R: MI.getOperand(i: 4).getReg(), Known&: Carry, DemandedElts,
712 Depth: Depth + 1);
713 // Carry has bit width 1
714 Carry = Carry.trunc(BitWidth: 1);
715 } else {
716 Carry.setAllZero();
717 }
718
719 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
720 Depth: Depth + 1);
721 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: Known2, DemandedElts,
722 Depth: Depth + 1);
723 Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry);
724 break;
725 }
726 case TargetOpcode::G_USUBO:
727 case TargetOpcode::G_USUBE:
728 case TargetOpcode::G_SSUBO:
729 case TargetOpcode::G_SSUBE:
730 case TargetOpcode::G_UMULO:
731 case TargetOpcode::G_SMULO: {
732 if (MI.getOperand(i: 1).getReg() == R) {
733 // If we know the result of a compare has the top bits zero, use this
734 // info.
735 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
736 TargetLowering::ZeroOrOneBooleanContent &&
737 BitWidth > 1)
738 Known.Zero.setBitsFrom(1);
739 }
740 break;
741 }
742 case TargetOpcode::G_CTTZ:
743 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
744 KnownBits SrcOpKnown;
745 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
746 Depth: Depth + 1);
747 // If we have a known 1, its position is our upper bound
748 unsigned PossibleTZ = SrcOpKnown.countMaxTrailingZeros();
749 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
750 Known.Zero.setBitsFrom(LowBits);
751 break;
752 }
753 case TargetOpcode::G_CTLZ:
754 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
755 KnownBits SrcOpKnown;
756 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
757 Depth: Depth + 1);
758 // If we have a known 1, its position is our upper bound.
759 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
760 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
761 Known.Zero.setBitsFrom(LowBits);
762 break;
763 }
764 case TargetOpcode::G_CTLS: {
765 Register Reg = MI.getOperand(i: 1).getReg();
766 unsigned MinRedundantSignBits = computeNumSignBits(R: Reg, Depth: Depth + 1) - 1;
767
768 unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
769
770 ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
771 APInt(BitWidth, MaxUpperRedundantSignBits));
772
773 Known = Range.toKnownBits();
774 break;
775 }
776 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
777 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
778 Register InVec = Extract.getVectorReg();
779 Register EltNo = Extract.getIndexReg();
780
781 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
782
783 LLT VecVT = MRI.getType(Reg: InVec);
784 // computeKnownBits not yet implemented for scalable vectors.
785 if (VecVT.isScalableVector())
786 break;
787
788 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
789 const unsigned NumSrcElts = VecVT.getNumElements();
790 // A return type different from the vector's element type may lead to
791 // issues with pattern selection. Bail out to avoid that.
792 if (BitWidth > EltBitWidth)
793 break;
794
795 Known.Zero.setAllBits();
796 Known.One.setAllBits();
797
798 // If we know the element index, just demand that vector element, else for
799 // an unknown element index, ignore DemandedElts and demand them all.
800 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
801 if (ConstEltNo && ConstEltNo->ult(RHS: NumSrcElts))
802 DemandedSrcElts =
803 APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue());
804
805 computeKnownBitsImpl(R: InVec, Known, DemandedElts: DemandedSrcElts, Depth: Depth + 1);
806 break;
807 }
808 case TargetOpcode::G_SHUFFLE_VECTOR: {
809 APInt DemandedLHS, DemandedRHS;
810 // Collect the known bits that are shared by every vector element referenced
811 // by the shuffle.
812 unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
813 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
814 DemandedElts, DemandedLHS, DemandedRHS))
815 break;
816
817 // Known bits are the values that are shared by every demanded element.
818 Known.Zero.setAllBits();
819 Known.One.setAllBits();
820 if (!!DemandedLHS) {
821 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS,
822 Depth: Depth + 1);
823 Known = Known.intersectWith(RHS: Known2);
824 }
825 // If we don't know any bits, early out.
826 if (Known.isUnknown())
827 break;
828 if (!!DemandedRHS) {
829 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS,
830 Depth: Depth + 1);
831 Known = Known.intersectWith(RHS: Known2);
832 }
833 break;
834 }
835 case TargetOpcode::G_CONCAT_VECTORS: {
836 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
837 break;
838 // Split DemandedElts and test each of the demanded subvectors.
839 Known.Zero.setAllBits();
840 Known.One.setAllBits();
841 unsigned NumSubVectorElts =
842 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
843
844 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
845 APInt DemandedSub =
846 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
847 if (!!DemandedSub) {
848 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1);
849
850 Known = Known.intersectWith(RHS: Known2);
851 }
852 // If we don't know any bits, early out.
853 if (Known.isUnknown())
854 break;
855 }
856 break;
857 }
858 case TargetOpcode::G_ABS: {
859 Register SrcReg = MI.getOperand(i: 1).getReg();
860 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
861 Known = Known.abs();
862 Known.Zero.setHighBits(computeNumSignBits(R: SrcReg, DemandedElts, Depth: Depth + 1) -
863 1);
864 break;
865 }
866 }
867
868 LLVM_DEBUG(dumpResult(MI, Known, Depth));
869}
870
871static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
872 Ty = Ty.getScalarType();
873 DenormalMode Mode = MF.getDenormalMode(FPType: getFltSemanticForLLT(Ty));
874 return Mode.Output == DenormalMode::IEEE ||
875 Mode.Output == DenormalMode::PositiveZero;
876}
877
878void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
879 FPClassTest InterestedClasses,
880 unsigned Depth) {
881 LLT Ty = MRI.getType(Reg: R);
882 APInt DemandedElts =
883 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
884 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
885}
886
887void GISelValueTracking::computeKnownFPClassForFPTrunc(
888 const MachineInstr &MI, const APInt &DemandedElts,
889 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
890 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
891 fcNone)
892 return;
893
894 Register Val = MI.getOperand(i: 1).getReg();
895 KnownFPClass KnownSrc;
896 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
897 Depth: Depth + 1);
898
899 // Sign should be preserved
900 // TODO: Handle cannot be ordered greater than zero
901 if (KnownSrc.cannotBeOrderedLessThanZero())
902 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
903
904 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
905
906 // Infinity needs a range check.
907}
908
909void GISelValueTracking::computeKnownFPClass(Register R,
910 const APInt &DemandedElts,
911 FPClassTest InterestedClasses,
912 KnownFPClass &Known,
913 unsigned Depth) {
914 assert(Known.isUnknown() && "should not be called with known information");
915
916 if (!DemandedElts) {
917 // No demanded elts, better to assume we don't know anything.
918 Known.resetAll();
919 return;
920 }
921
922 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
923
924 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
925 unsigned Opcode = MI.getOpcode();
926 LLT DstTy = MRI.getType(Reg: R);
927
928 if (!DstTy.isValid()) {
929 Known.resetAll();
930 return;
931 }
932
933 if (auto Cst = GFConstant::getConstant(Const: R, MRI)) {
934 switch (Cst->getKind()) {
935 case GFConstant::GFConstantKind::Scalar: {
936 auto APF = Cst->getScalarValue();
937 Known.KnownFPClasses = APF.classify();
938 Known.SignBit = APF.isNegative();
939 break;
940 }
941 case GFConstant::GFConstantKind::FixedVector: {
942 Known.KnownFPClasses = fcNone;
943 bool SignBitAllZero = true;
944 bool SignBitAllOne = true;
945
946 for (auto C : *Cst) {
947 Known.KnownFPClasses |= C.classify();
948 if (C.isNegative())
949 SignBitAllZero = false;
950 else
951 SignBitAllOne = false;
952 }
953
954 if (SignBitAllOne != SignBitAllZero)
955 Known.SignBit = SignBitAllOne;
956
957 break;
958 }
959 case GFConstant::GFConstantKind::ScalableVector: {
960 Known.resetAll();
961 break;
962 }
963 }
964
965 return;
966 }
967
968 FPClassTest KnownNotFromFlags = fcNone;
969 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans))
970 KnownNotFromFlags |= fcNan;
971 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs))
972 KnownNotFromFlags |= fcInf;
973
974 // We no longer need to find out about these bits from inputs if we can
975 // assume this from flags/attributes.
976 InterestedClasses &= ~KnownNotFromFlags;
977
978 llvm::scope_exit ClearClassesFromFlags(
979 [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); });
980
981 // All recursive calls that increase depth must come after this.
982 if (Depth == MaxAnalysisRecursionDepth)
983 return;
984
985 const MachineFunction *MF = MI.getMF();
986
987 switch (Opcode) {
988 default:
989 TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
990 Depth);
991 break;
992 case TargetOpcode::G_FNEG: {
993 Register Val = MI.getOperand(i: 1).getReg();
994 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
995 Known.fneg();
996 break;
997 }
998 case TargetOpcode::G_SELECT: {
999 GSelect &SelMI = cast<GSelect>(Val&: MI);
1000 Register Cond = SelMI.getCondReg();
1001 Register LHS = SelMI.getTrueReg();
1002 Register RHS = SelMI.getFalseReg();
1003
1004 FPClassTest FilterLHS = fcAllFlags;
1005 FPClassTest FilterRHS = fcAllFlags;
1006
1007 Register TestedValue;
1008 FPClassTest MaskIfTrue = fcAllFlags;
1009 FPClassTest MaskIfFalse = fcAllFlags;
1010 FPClassTest ClassVal = fcNone;
1011
1012 CmpInst::Predicate Pred;
1013 Register CmpLHS, CmpRHS;
1014 if (mi_match(R: Cond, MRI,
1015 P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) {
1016 // If the select filters out a value based on the class, it no longer
1017 // participates in the class of the result
1018
1019 // TODO: In some degenerate cases we can infer something if we try again
1020 // without looking through sign operations.
1021 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
1022 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
1023 fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
1024 } else if (mi_match(
1025 R: Cond, MRI,
1026 P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) {
1027 FPClassTest TestedMask = ClassVal;
1028 MaskIfTrue = TestedMask;
1029 MaskIfFalse = ~TestedMask;
1030 }
1031
1032 if (TestedValue == LHS) {
1033 // match !isnan(x) ? x : y
1034 FilterLHS = MaskIfTrue;
1035 } else if (TestedValue == RHS) { // && IsExactClass
1036 // match !isnan(x) ? y : x
1037 FilterRHS = MaskIfFalse;
1038 }
1039
1040 KnownFPClass Known2;
1041 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
1042 Depth: Depth + 1);
1043 Known.KnownFPClasses &= FilterLHS;
1044
1045 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
1046 Known&: Known2, Depth: Depth + 1);
1047 Known2.KnownFPClasses &= FilterRHS;
1048
1049 Known |= Known2;
1050 break;
1051 }
1052 case TargetOpcode::G_FCOPYSIGN: {
1053 Register Magnitude = MI.getOperand(i: 1).getReg();
1054 Register Sign = MI.getOperand(i: 2).getReg();
1055
1056 KnownFPClass KnownSign;
1057
1058 computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known,
1059 Depth: Depth + 1);
1060 computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign,
1061 Depth: Depth + 1);
1062 Known.copysign(Sign: KnownSign);
1063 break;
1064 }
1065 case TargetOpcode::G_FMA:
1066 case TargetOpcode::G_STRICT_FMA:
1067 case TargetOpcode::G_FMAD: {
1068 if ((InterestedClasses & fcNegative) == fcNone)
1069 break;
1070
1071 Register A = MI.getOperand(i: 1).getReg();
1072 Register B = MI.getOperand(i: 2).getReg();
1073 Register C = MI.getOperand(i: 3).getReg();
1074
1075 if (A != B)
1076 break;
1077
1078 // The multiply cannot be -0 and therefore the add can't be -0
1079 Known.knownNot(RuleOut: fcNegZero);
1080
1081 // x * x + y is non-negative if y is non-negative.
1082 KnownFPClass KnownAddend;
1083 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend,
1084 Depth: Depth + 1);
1085
1086 if (KnownAddend.cannotBeOrderedLessThanZero())
1087 Known.knownNot(RuleOut: fcNegative);
1088 break;
1089 }
1090 case TargetOpcode::G_FSQRT:
1091 case TargetOpcode::G_STRICT_FSQRT: {
1092 KnownFPClass KnownSrc;
1093 FPClassTest InterestedSrcs = InterestedClasses;
1094 if (InterestedClasses & fcNan)
1095 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1096
1097 Register Val = MI.getOperand(i: 1).getReg();
1098
1099 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1100
1101 if (KnownSrc.isKnownNeverPosInfinity())
1102 Known.knownNot(RuleOut: fcPosInf);
1103 if (KnownSrc.isKnownNever(Mask: fcSNan))
1104 Known.knownNot(RuleOut: fcSNan);
1105
1106 // Any negative value besides -0 returns a nan.
1107 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1108 Known.knownNot(RuleOut: fcNan);
1109
1110 // The only negative value that can be returned is -0 for -0 inputs.
1111 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
1112 break;
1113 }
1114 case TargetOpcode::G_FABS: {
1115 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1116 Register Val = MI.getOperand(i: 1).getReg();
1117 // If we only care about the sign bit we don't need to inspect the
1118 // operand.
1119 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known,
1120 Depth: Depth + 1);
1121 }
1122 Known.fabs();
1123 break;
1124 }
1125 case TargetOpcode::G_FSIN:
1126 case TargetOpcode::G_FCOS:
1127 case TargetOpcode::G_FSINCOS: {
1128 // Return NaN on infinite inputs.
1129 Register Val = MI.getOperand(i: 1).getReg();
1130 KnownFPClass KnownSrc;
1131
1132 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1133 Depth: Depth + 1);
1134 Known.knownNot(RuleOut: fcInf);
1135
1136 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1137 Known.knownNot(RuleOut: fcNan);
1138 break;
1139 }
1140 case TargetOpcode::G_FMAXNUM:
1141 case TargetOpcode::G_FMINNUM:
1142 case TargetOpcode::G_FMINNUM_IEEE:
1143 case TargetOpcode::G_FMAXIMUM:
1144 case TargetOpcode::G_FMINIMUM:
1145 case TargetOpcode::G_FMAXNUM_IEEE:
1146 case TargetOpcode::G_FMAXIMUMNUM:
1147 case TargetOpcode::G_FMINIMUMNUM: {
1148 Register LHS = MI.getOperand(i: 1).getReg();
1149 Register RHS = MI.getOperand(i: 2).getReg();
1150 KnownFPClass KnownLHS, KnownRHS;
1151
1152 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS,
1153 Depth: Depth + 1);
1154 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS,
1155 Depth: Depth + 1);
1156
1157 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1158 Known = KnownLHS | KnownRHS;
1159
1160 // If either operand is not NaN, the result is not NaN.
1161 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1162 Opcode == TargetOpcode::G_FMAXNUM ||
1163 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1164 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1165 Known.knownNot(RuleOut: fcNan);
1166
1167 if (Opcode == TargetOpcode::G_FMAXNUM ||
1168 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1169 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1170 // If at least one operand is known to be positive, the result must be
1171 // positive.
1172 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1173 KnownLHS.isKnownNeverNaN()) ||
1174 (KnownRHS.cannotBeOrderedLessThanZero() &&
1175 KnownRHS.isKnownNeverNaN()))
1176 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1177 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1178 // If at least one operand is known to be positive, the result must be
1179 // positive.
1180 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1181 KnownRHS.cannotBeOrderedLessThanZero())
1182 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1183 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1184 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1185 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1186 // If at least one operand is known to be negative, the result must be
1187 // negative.
1188 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1189 KnownLHS.isKnownNeverNaN()) ||
1190 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1191 KnownRHS.isKnownNeverNaN()))
1192 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1193 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1194 // If at least one operand is known to be negative, the result must be
1195 // negative.
1196 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1197 KnownRHS.cannotBeOrderedGreaterThanZero())
1198 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1199 } else {
1200 llvm_unreachable("unhandled intrinsic");
1201 }
1202
1203 // Fixup zero handling if denormals could be returned as a zero.
1204 //
1205 // As there's no spec for denormal flushing, be conservative with the
1206 // treatment of denormals that could be flushed to zero. For older
1207 // subtargets on AMDGPU the min/max instructions would not flush the
1208 // output and return the original value.
1209 //
1210 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1211 !Known.isKnownNeverSubnormal()) {
1212 DenormalMode Mode =
1213 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1214 if (Mode != DenormalMode::getIEEE())
1215 Known.KnownFPClasses |= fcZero;
1216 }
1217
1218 if (Known.isKnownNeverNaN()) {
1219 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1220 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1221 if (*KnownLHS.SignBit)
1222 Known.signBitMustBeOne();
1223 else
1224 Known.signBitMustBeZero();
1225 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1226 Opcode == TargetOpcode::G_FMINIMUM) ||
1227 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1228 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1229 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1230 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1231 // FIXME: Should be using logical zero versions
1232 ((KnownLHS.isKnownNeverNegZero() ||
1233 KnownRHS.isKnownNeverPosZero()) &&
1234 (KnownLHS.isKnownNeverPosZero() ||
1235 KnownRHS.isKnownNeverNegZero()))) {
1236 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1237 Opcode == TargetOpcode::G_FMAXNUM ||
1238 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1239 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1240 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1241 Known.signBitMustBeZero();
1242 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1243 Opcode == TargetOpcode::G_FMINNUM ||
1244 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1245 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1246 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1247 Known.signBitMustBeOne();
1248 }
1249 }
1250 break;
1251 }
1252 case TargetOpcode::G_FCANONICALIZE: {
1253 Register Val = MI.getOperand(i: 1).getReg();
1254 KnownFPClass KnownSrc;
1255 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1256 Depth: Depth + 1);
1257
1258 // This is essentially a stronger form of
1259 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1260 // actually have an IR canonicalization guarantee.
1261
1262 // Canonicalize may flush denormals to zero, so we have to consider the
1263 // denormal mode to preserve known-not-0 knowledge.
1264 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1265
1266 // Stronger version of propagateNaN
1267 // Canonicalize is guaranteed to quiet signaling nans.
1268 if (KnownSrc.isKnownNeverNaN())
1269 Known.knownNot(RuleOut: fcNan);
1270 else
1271 Known.knownNot(RuleOut: fcSNan);
1272
1273 // If the parent function flushes denormals, the canonical output cannot
1274 // be a denormal.
1275 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1276 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1277 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1278 if (DenormMode == DenormalMode::getIEEE()) {
1279 if (KnownSrc.isKnownNever(Mask: fcPosZero))
1280 Known.knownNot(RuleOut: fcPosZero);
1281 if (KnownSrc.isKnownNever(Mask: fcNegZero))
1282 Known.knownNot(RuleOut: fcNegZero);
1283 break;
1284 }
1285
1286 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1287 Known.knownNot(RuleOut: fcSubnormal);
1288
1289 if (DenormMode.Input == DenormalMode::PositiveZero ||
1290 (DenormMode.Output == DenormalMode::PositiveZero &&
1291 DenormMode.Input == DenormalMode::IEEE))
1292 Known.knownNot(RuleOut: fcNegZero);
1293
1294 break;
1295 }
1296 case TargetOpcode::G_VECREDUCE_FMAX:
1297 case TargetOpcode::G_VECREDUCE_FMIN:
1298 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1299 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1300 Register Val = MI.getOperand(i: 1).getReg();
1301 // reduce min/max will choose an element from one of the vector elements,
1302 // so we can infer and class information that is common to all elements.
1303
1304 Known =
1305 computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1);
1306 // Can only propagate sign if output is never NaN.
1307 if (!Known.isKnownNeverNaN())
1308 Known.SignBit.reset();
1309 break;
1310 }
1311 case TargetOpcode::G_TRUNC:
1312 case TargetOpcode::G_FFLOOR:
1313 case TargetOpcode::G_FCEIL:
1314 case TargetOpcode::G_FRINT:
1315 case TargetOpcode::G_FNEARBYINT:
1316 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1317 case TargetOpcode::G_INTRINSIC_ROUND: {
1318 Register Val = MI.getOperand(i: 1).getReg();
1319 KnownFPClass KnownSrc;
1320 FPClassTest InterestedSrcs = InterestedClasses;
1321 if (InterestedSrcs & fcPosFinite)
1322 InterestedSrcs |= fcPosFinite;
1323 if (InterestedSrcs & fcNegFinite)
1324 InterestedSrcs |= fcNegFinite;
1325 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1326
1327 // Integer results cannot be subnormal.
1328 Known.knownNot(RuleOut: fcSubnormal);
1329
1330 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
1331
1332 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1333
1334 // Negative round ups to 0 produce -0
1335 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
1336 Known.knownNot(RuleOut: fcPosFinite);
1337 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
1338 Known.knownNot(RuleOut: fcNegFinite);
1339
1340 break;
1341 }
1342 case TargetOpcode::G_FEXP:
1343 case TargetOpcode::G_FEXP2:
1344 case TargetOpcode::G_FEXP10: {
1345 Known.knownNot(RuleOut: fcNegative);
1346 if ((InterestedClasses & fcNan) == fcNone)
1347 break;
1348
1349 Register Val = MI.getOperand(i: 1).getReg();
1350 KnownFPClass KnownSrc;
1351 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1352 Depth: Depth + 1);
1353 if (KnownSrc.isKnownNeverNaN()) {
1354 Known.knownNot(RuleOut: fcNan);
1355 Known.signBitMustBeZero();
1356 }
1357
1358 break;
1359 }
1360 case TargetOpcode::G_FLOG:
1361 case TargetOpcode::G_FLOG2:
1362 case TargetOpcode::G_FLOG10: {
1363 // log(+inf) -> +inf
1364 // log([+-]0.0) -> -inf
1365 // log(-inf) -> nan
1366 // log(-x) -> nan
1367 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1368 break;
1369
1370 FPClassTest InterestedSrcs = InterestedClasses;
1371 if ((InterestedClasses & fcNegInf) != fcNone)
1372 InterestedSrcs |= fcZero | fcSubnormal;
1373 if ((InterestedClasses & fcNan) != fcNone)
1374 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1375
1376 Register Val = MI.getOperand(i: 1).getReg();
1377 KnownFPClass KnownSrc;
1378 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1379
1380 if (KnownSrc.isKnownNeverPosInfinity())
1381 Known.knownNot(RuleOut: fcPosInf);
1382
1383 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1384 Known.knownNot(RuleOut: fcNan);
1385
1386 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1387 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1388 DenormalMode Mode = MF->getDenormalMode(FPType: FltSem);
1389
1390 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1391 Known.knownNot(RuleOut: fcNegInf);
1392
1393 break;
1394 }
1395 case TargetOpcode::G_FPOWI: {
1396 if ((InterestedClasses & fcNegative) == fcNone)
1397 break;
1398
1399 Register Exp = MI.getOperand(i: 2).getReg();
1400 LLT ExpTy = MRI.getType(Reg: Exp);
1401 KnownBits ExponentKnownBits = getKnownBits(
1402 R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1403
1404 if (ExponentKnownBits.Zero[0]) { // Is even
1405 Known.knownNot(RuleOut: fcNegative);
1406 break;
1407 }
1408
1409 // Given that exp is an integer, here are the
1410 // ways that pow can return a negative value:
1411 //
1412 // pow(-x, exp) --> negative if exp is odd and x is negative.
1413 // pow(-0, exp) --> -inf if exp is negative odd.
1414 // pow(-0, exp) --> -0 if exp is positive odd.
1415 // pow(-inf, exp) --> -0 if exp is negative odd.
1416 // pow(-inf, exp) --> -inf if exp is positive odd.
1417 Register Val = MI.getOperand(i: 1).getReg();
1418 KnownFPClass KnownSrc;
1419 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: fcNegative, Known&: KnownSrc, Depth: Depth + 1);
1420 if (KnownSrc.isKnownNever(Mask: fcNegative))
1421 Known.knownNot(RuleOut: fcNegative);
1422 break;
1423 }
1424 case TargetOpcode::G_FLDEXP:
1425 case TargetOpcode::G_STRICT_FLDEXP: {
1426 Register Val = MI.getOperand(i: 1).getReg();
1427 KnownFPClass KnownSrc;
1428 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1429 Depth: Depth + 1);
1430 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
1431
1432 // Sign is preserved, but underflows may produce zeroes.
1433 if (KnownSrc.isKnownNever(Mask: fcNegative))
1434 Known.knownNot(RuleOut: fcNegative);
1435 else if (KnownSrc.cannotBeOrderedLessThanZero())
1436 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1437
1438 if (KnownSrc.isKnownNever(Mask: fcPositive))
1439 Known.knownNot(RuleOut: fcPositive);
1440 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1441 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1442
1443 // Can refine inf/zero handling based on the exponent operand.
1444 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1445 if ((InterestedClasses & ExpInfoMask) == fcNone)
1446 break;
1447 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1448 break;
1449
1450 // TODO: Handle constant range of Exp
1451
1452 break;
1453 }
1454 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1455 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1456 Depth);
1457 break;
1458 }
1459 case TargetOpcode::G_FADD:
1460 case TargetOpcode::G_STRICT_FADD:
1461 case TargetOpcode::G_FSUB:
1462 case TargetOpcode::G_STRICT_FSUB: {
1463 Register LHS = MI.getOperand(i: 1).getReg();
1464 Register RHS = MI.getOperand(i: 2).getReg();
1465 KnownFPClass KnownLHS, KnownRHS;
1466 bool WantNegative =
1467 (Opcode == TargetOpcode::G_FADD ||
1468 Opcode == TargetOpcode::G_STRICT_FADD) &&
1469 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1470 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1471 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1472
1473 if (!WantNaN && !WantNegative && !WantNegZero)
1474 break;
1475
1476 FPClassTest InterestedSrcs = InterestedClasses;
1477 if (WantNegative)
1478 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1479 if (InterestedClasses & fcNan)
1480 InterestedSrcs |= fcInf;
1481 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1);
1482
1483 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1484 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1485 WantNegZero ||
1486 (Opcode == TargetOpcode::G_FSUB ||
1487 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1488
1489 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1490 // there's no point.
1491 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS,
1492 Depth: Depth + 1);
1493 // Adding positive and negative infinity produces NaN.
1494 // TODO: Check sign of infinities.
1495 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1496 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1497 Known.knownNot(RuleOut: fcNan);
1498
1499 if (Opcode == TargetOpcode::G_FADD ||
1500 Opcode == TargetOpcode::G_STRICT_FADD) {
1501 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1502 KnownRHS.cannotBeOrderedLessThanZero())
1503 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1504
1505 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1506 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1507 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1508 KnownRHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1509 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1510 // Make sure output negative denormal can't flush to -0
1511 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1512 Known.knownNot(RuleOut: fcNegZero);
1513 } else {
1514 // Only fsub -0, +0 can return -0
1515 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1516 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1517 KnownRHS.isKnownNeverLogicalPosZero(Mode: MF->getDenormalMode(
1518 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1519 // Make sure output negative denormal can't flush to -0
1520 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1521 Known.knownNot(RuleOut: fcNegZero);
1522 }
1523 }
1524
1525 break;
1526 }
1527 case TargetOpcode::G_FMUL:
1528 case TargetOpcode::G_STRICT_FMUL: {
1529 Register LHS = MI.getOperand(i: 1).getReg();
1530 Register RHS = MI.getOperand(i: 2).getReg();
1531 // X * X is always non-negative or a NaN.
1532 if (LHS == RHS)
1533 Known.knownNot(RuleOut: fcNegative);
1534
1535 if ((InterestedClasses & fcNan) != fcNan)
1536 break;
1537
1538 // fcSubnormal is only needed in case of DAZ.
1539 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1540
1541 KnownFPClass KnownLHS, KnownRHS;
1542 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS, Depth: Depth + 1);
1543 if (!KnownRHS.isKnownNeverNaN())
1544 break;
1545
1546 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS, Depth: Depth + 1);
1547 if (!KnownLHS.isKnownNeverNaN())
1548 break;
1549
1550 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1551 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1552 Known.signBitMustBeZero();
1553 else
1554 Known.signBitMustBeOne();
1555 }
1556
1557 // If 0 * +/-inf produces NaN.
1558 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1559 Known.knownNot(RuleOut: fcNan);
1560 break;
1561 }
1562
1563 if ((KnownRHS.isKnownNeverInfinity() ||
1564 KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1565 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1566 (KnownLHS.isKnownNeverInfinity() ||
1567 KnownRHS.isKnownNeverLogicalZero(
1568 Mode: MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))
1569 Known.knownNot(RuleOut: fcNan);
1570
1571 break;
1572 }
1573 case TargetOpcode::G_FDIV:
1574 case TargetOpcode::G_FREM: {
1575 Register LHS = MI.getOperand(i: 1).getReg();
1576 Register RHS = MI.getOperand(i: 2).getReg();
1577
1578 if (LHS == RHS) {
1579 // TODO: Could filter out snan if we inspect the operand
1580 if (Opcode == TargetOpcode::G_FDIV) {
1581 // X / X is always exactly 1.0 or a NaN.
1582 Known.KnownFPClasses = fcNan | fcPosNormal;
1583 } else {
1584 // X % X is always exactly [+-]0.0 or a NaN.
1585 Known.KnownFPClasses = fcNan | fcZero;
1586 }
1587
1588 break;
1589 }
1590
1591 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1592 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1593 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1594 (InterestedClasses & fcPositive) != fcNone;
1595 if (!WantNan && !WantNegative && !WantPositive)
1596 break;
1597
1598 KnownFPClass KnownLHS, KnownRHS;
1599
1600 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative,
1601 Known&: KnownRHS, Depth: Depth + 1);
1602
1603 bool KnowSomethingUseful =
1604 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
1605
1606 if (KnowSomethingUseful || WantPositive) {
1607 const FPClassTest InterestedLHS =
1608 WantPositive ? fcAllFlags
1609 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
1610
1611 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & InterestedLHS,
1612 Known&: KnownLHS, Depth: Depth + 1);
1613 }
1614
1615 if (Opcode == TargetOpcode::G_FDIV) {
1616 // Only 0/0, Inf/Inf produce NaN.
1617 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1618 (KnownLHS.isKnownNeverInfinity() ||
1619 KnownRHS.isKnownNeverInfinity()) &&
1620 ((KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1621 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) ||
1622 (KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1623 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))) {
1624 Known.knownNot(RuleOut: fcNan);
1625 }
1626
1627 // X / -0.0 is -Inf (or NaN).
1628 // +X / +X is +X
1629 if (KnownLHS.isKnownNever(Mask: fcNegative) &&
1630 KnownRHS.isKnownNever(Mask: fcNegative))
1631 Known.knownNot(RuleOut: fcNegative);
1632 } else {
1633 // Inf REM x and x REM 0 produce NaN.
1634 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1635 KnownLHS.isKnownNeverInfinity() &&
1636 KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1637 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) {
1638 Known.knownNot(RuleOut: fcNan);
1639 }
1640
1641 // The sign for frem is the same as the first operand.
1642 if (KnownLHS.cannotBeOrderedLessThanZero())
1643 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1644 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1645 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1646
1647 // See if we can be more aggressive about the sign of 0.
1648 if (KnownLHS.isKnownNever(Mask: fcNegative))
1649 Known.knownNot(RuleOut: fcNegative);
1650 if (KnownLHS.isKnownNever(Mask: fcPositive))
1651 Known.knownNot(RuleOut: fcPositive);
1652 }
1653
1654 break;
1655 }
1656 case TargetOpcode::G_FPEXT: {
1657 Register Dst = MI.getOperand(i: 0).getReg();
1658 Register Src = MI.getOperand(i: 1).getReg();
1659 // Infinity, nan and zero propagate from source.
1660 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1661
1662 LLT DstTy = MRI.getType(Reg: Dst).getScalarType();
1663 const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstTy);
1664 LLT SrcTy = MRI.getType(Reg: Src).getScalarType();
1665 const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy);
1666
1667 // All subnormal inputs should be in the normal range in the result type.
1668 if (APFloat::isRepresentableAsNormalIn(Src: SrcSem, Dst: DstSem)) {
1669 if (Known.KnownFPClasses & fcPosSubnormal)
1670 Known.KnownFPClasses |= fcPosNormal;
1671 if (Known.KnownFPClasses & fcNegSubnormal)
1672 Known.KnownFPClasses |= fcNegNormal;
1673 Known.knownNot(RuleOut: fcSubnormal);
1674 }
1675
1676 // Sign bit of a nan isn't guaranteed.
1677 if (!Known.isKnownNeverNaN())
1678 Known.SignBit = std::nullopt;
1679 break;
1680 }
1681 case TargetOpcode::G_FPTRUNC: {
1682 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1683 Depth);
1684 break;
1685 }
1686 case TargetOpcode::G_SITOFP:
1687 case TargetOpcode::G_UITOFP: {
1688 // Cannot produce nan
1689 Known.knownNot(RuleOut: fcNan);
1690
1691 // Integers cannot be subnormal
1692 Known.knownNot(RuleOut: fcSubnormal);
1693
1694 // sitofp and uitofp turn into +0.0 for zero.
1695 Known.knownNot(RuleOut: fcNegZero);
1696 if (Opcode == TargetOpcode::G_UITOFP)
1697 Known.signBitMustBeZero();
1698
1699 Register Val = MI.getOperand(i: 1).getReg();
1700 LLT Ty = MRI.getType(Reg: Val);
1701
1702 if (InterestedClasses & fcInf) {
1703 // Get width of largest magnitude integer (remove a bit if signed).
1704 // This still works for a signed minimum value because the largest FP
1705 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1706 int IntSize = Ty.getScalarSizeInBits();
1707 if (Opcode == TargetOpcode::G_SITOFP)
1708 --IntSize;
1709
1710 // If the exponent of the largest finite FP value can hold the largest
1711 // integer, the result of the cast must be finite.
1712 LLT FPTy = DstTy.getScalarType();
1713 const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy);
1714 if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize)
1715 Known.knownNot(RuleOut: fcInf);
1716 }
1717
1718 break;
1719 }
1720 // case TargetOpcode::G_MERGE_VALUES:
1721 case TargetOpcode::G_BUILD_VECTOR:
1722 case TargetOpcode::G_CONCAT_VECTORS: {
1723 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI);
1724
1725 if (!DstTy.isFixedVector())
1726 break;
1727
1728 bool First = true;
1729 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1730 // We know the index we are inserting to, so clear it from Vec check.
1731 bool NeedsElt = DemandedElts[Idx];
1732
1733 // Do we demand the inserted element?
1734 if (NeedsElt) {
1735 Register Src = Merge.getSourceReg(I: Idx);
1736 if (First) {
1737 computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1);
1738 First = false;
1739 } else {
1740 KnownFPClass Known2;
1741 computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1);
1742 Known |= Known2;
1743 }
1744
1745 // If we don't know any bits, early out.
1746 if (Known.isUnknown())
1747 break;
1748 }
1749 }
1750
1751 break;
1752 }
1753 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1754 // Look through extract element. If the index is non-constant or
1755 // out-of-range demand all elements, otherwise just the extracted
1756 // element.
1757 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
1758 Register Vec = Extract.getVectorReg();
1759 Register Idx = Extract.getIndexReg();
1760
1761 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1762
1763 LLT VecTy = MRI.getType(Reg: Vec);
1764
1765 if (VecTy.isFixedVector()) {
1766 unsigned NumElts = VecTy.getNumElements();
1767 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1768 if (CIdx && CIdx->ult(RHS: NumElts))
1769 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1770 return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
1771 Depth: Depth + 1);
1772 }
1773
1774 break;
1775 }
1776 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1777 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
1778 Register Vec = Insert.getVectorReg();
1779 Register Elt = Insert.getElementReg();
1780 Register Idx = Insert.getIndexReg();
1781
1782 LLT VecTy = MRI.getType(Reg: Vec);
1783
1784 if (VecTy.isScalableVector())
1785 return;
1786
1787 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1788
1789 unsigned NumElts = DemandedElts.getBitWidth();
1790 APInt DemandedVecElts = DemandedElts;
1791 bool NeedsElt = true;
1792 // If we know the index we are inserting to, clear it from Vec check.
1793 if (CIdx && CIdx->ult(RHS: NumElts)) {
1794 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1795 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1796 }
1797
1798 // Do we demand the inserted element?
1799 if (NeedsElt) {
1800 computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1);
1801 // If we don't know any bits, early out.
1802 if (Known.isUnknown())
1803 break;
1804 } else {
1805 Known.KnownFPClasses = fcNone;
1806 }
1807
1808 // Do we need anymore elements from Vec?
1809 if (!DemandedVecElts.isZero()) {
1810 KnownFPClass Known2;
1811 computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
1812 Depth: Depth + 1);
1813 Known |= Known2;
1814 }
1815
1816 break;
1817 }
1818 case TargetOpcode::G_SHUFFLE_VECTOR: {
1819 // For undef elements, we don't know anything about the common state of
1820 // the shuffle result.
1821 GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI);
1822 APInt DemandedLHS, DemandedRHS;
1823 if (DstTy.isScalableVector()) {
1824 assert(DemandedElts == APInt(1, 1));
1825 DemandedLHS = DemandedRHS = DemandedElts;
1826 } else {
1827 if (!llvm::getShuffleDemandedElts(SrcWidth: DstTy.getNumElements(), Mask: Shuf.getMask(),
1828 DemandedElts, DemandedLHS,
1829 DemandedRHS)) {
1830 Known.resetAll();
1831 return;
1832 }
1833 }
1834
1835 if (!!DemandedLHS) {
1836 Register LHS = Shuf.getSrc1Reg();
1837 computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
1838 Depth: Depth + 1);
1839
1840 // If we don't know any bits, early out.
1841 if (Known.isUnknown())
1842 break;
1843 } else {
1844 Known.KnownFPClasses = fcNone;
1845 }
1846
1847 if (!!DemandedRHS) {
1848 KnownFPClass Known2;
1849 Register RHS = Shuf.getSrc2Reg();
1850 computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
1851 Depth: Depth + 1);
1852 Known |= Known2;
1853 }
1854 break;
1855 }
1856 case TargetOpcode::COPY: {
1857 Register Src = MI.getOperand(i: 1).getReg();
1858
1859 if (!Src.isVirtual())
1860 return;
1861
1862 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1863 break;
1864 }
1865 }
1866}
1867
1868KnownFPClass
1869GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1870 FPClassTest InterestedClasses,
1871 unsigned Depth) {
1872 KnownFPClass KnownClasses;
1873 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth);
1874 return KnownClasses;
1875}
1876
1877KnownFPClass GISelValueTracking::computeKnownFPClass(
1878 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1879 KnownFPClass Known;
1880 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1881 return Known;
1882}
1883
1884KnownFPClass GISelValueTracking::computeKnownFPClass(
1885 Register R, const APInt &DemandedElts, uint32_t Flags,
1886 FPClassTest InterestedClasses, unsigned Depth) {
1887 if (Flags & MachineInstr::MIFlag::FmNoNans)
1888 InterestedClasses &= ~fcNan;
1889 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1890 InterestedClasses &= ~fcInf;
1891
1892 KnownFPClass Result =
1893 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1894
1895 if (Flags & MachineInstr::MIFlag::FmNoNans)
1896 Result.KnownFPClasses &= ~fcNan;
1897 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1898 Result.KnownFPClasses &= ~fcInf;
1899 return Result;
1900}
1901
1902KnownFPClass GISelValueTracking::computeKnownFPClass(
1903 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1904 LLT Ty = MRI.getType(Reg: R);
1905 APInt DemandedElts =
1906 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
1907 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1908}
1909
1910/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1911unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1912 const APInt &DemandedElts,
1913 unsigned Depth) {
1914 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1915 unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth);
1916 if (Src1SignBits == 1)
1917 return 1;
1918 return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits);
1919}
1920
1921/// Compute the known number of sign bits with attached range metadata in the
1922/// memory operand. If this is an extending load, accounts for the behavior of
1923/// the high bits.
1924static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
1925 unsigned TyBits) {
1926 const MDNode *Ranges = Ld->getRanges();
1927 if (!Ranges)
1928 return 1;
1929
1930 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges);
1931 if (TyBits > CR.getBitWidth()) {
1932 switch (Ld->getOpcode()) {
1933 case TargetOpcode::G_SEXTLOAD:
1934 CR = CR.signExtend(BitWidth: TyBits);
1935 break;
1936 case TargetOpcode::G_ZEXTLOAD:
1937 CR = CR.zeroExtend(BitWidth: TyBits);
1938 break;
1939 default:
1940 break;
1941 }
1942 }
1943
1944 return std::min(a: CR.getSignedMin().getNumSignBits(),
1945 b: CR.getSignedMax().getNumSignBits());
1946}
1947
1948unsigned GISelValueTracking::computeNumSignBits(Register R,
1949 const APInt &DemandedElts,
1950 unsigned Depth) {
1951 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
1952 unsigned Opcode = MI.getOpcode();
1953
1954 if (Opcode == TargetOpcode::G_CONSTANT)
1955 return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits();
1956
1957 if (Depth == getMaxDepth())
1958 return 1;
1959
1960 if (!DemandedElts)
1961 return 1; // No demanded elts, better to assume we don't know anything.
1962
1963 LLT DstTy = MRI.getType(Reg: R);
1964 const unsigned TyBits = DstTy.getScalarSizeInBits();
1965
1966 // Handle the case where this is called on a register that does not have a
1967 // type constraint. This is unlikely to occur except by looking through copies
1968 // but it is possible for the initial register being queried to be in this
1969 // state.
1970 if (!DstTy.isValid())
1971 return 1;
1972
1973 unsigned FirstAnswer = 1;
1974 switch (Opcode) {
1975 case TargetOpcode::COPY: {
1976 MachineOperand &Src = MI.getOperand(i: 1);
1977 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1978 MRI.getType(Reg: Src.getReg()).isValid()) {
1979 // Don't increment Depth for this one since we didn't do any work.
1980 return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth);
1981 }
1982
1983 return 1;
1984 }
1985 case TargetOpcode::G_SEXT: {
1986 Register Src = MI.getOperand(i: 1).getReg();
1987 LLT SrcTy = MRI.getType(Reg: Src);
1988 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1989 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp;
1990 }
1991 case TargetOpcode::G_ASSERT_SEXT:
1992 case TargetOpcode::G_SEXT_INREG: {
1993 // Max of the input and what this extends.
1994 Register Src = MI.getOperand(i: 1).getReg();
1995 unsigned SrcBits = MI.getOperand(i: 2).getImm();
1996 unsigned InRegBits = TyBits - SrcBits + 1;
1997 return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1),
1998 b: InRegBits);
1999 }
2000 case TargetOpcode::G_LOAD: {
2001 GLoad *Ld = cast<GLoad>(Val: &MI);
2002 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
2003 break;
2004
2005 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2006 }
2007 case TargetOpcode::G_SEXTLOAD: {
2008 GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI);
2009
2010 // FIXME: We need an in-memory type representation.
2011 if (DstTy.isVector())
2012 return 1;
2013
2014 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2015 if (NumBits != 1)
2016 return NumBits;
2017
2018 // e.g. i16->i32 = '17' bits known.
2019 const MachineMemOperand *MMO = *MI.memoperands_begin();
2020 return TyBits - MMO->getSizeInBits().getValue() + 1;
2021 }
2022 case TargetOpcode::G_ZEXTLOAD: {
2023 GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI);
2024
2025 // FIXME: We need an in-memory type representation.
2026 if (DstTy.isVector())
2027 return 1;
2028
2029 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2030 if (NumBits != 1)
2031 return NumBits;
2032
2033 // e.g. i16->i32 = '16' bits known.
2034 const MachineMemOperand *MMO = *MI.memoperands_begin();
2035 return TyBits - MMO->getSizeInBits().getValue();
2036 }
2037 case TargetOpcode::G_AND:
2038 case TargetOpcode::G_OR:
2039 case TargetOpcode::G_XOR: {
2040 Register Src1 = MI.getOperand(i: 1).getReg();
2041 unsigned Src1NumSignBits =
2042 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2043 if (Src1NumSignBits != 1) {
2044 Register Src2 = MI.getOperand(i: 2).getReg();
2045 unsigned Src2NumSignBits =
2046 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2047 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits);
2048 }
2049 break;
2050 }
2051 case TargetOpcode::G_ASHR: {
2052 Register Src1 = MI.getOperand(i: 1).getReg();
2053 Register Src2 = MI.getOperand(i: 2).getReg();
2054 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2055 if (auto C = getValidMinimumShiftAmount(R: Src2, DemandedElts, Depth: Depth + 1))
2056 FirstAnswer = std::min<uint64_t>(a: FirstAnswer + *C, b: TyBits);
2057 break;
2058 }
2059 case TargetOpcode::G_SHL: {
2060 Register Src1 = MI.getOperand(i: 1).getReg();
2061 Register Src2 = MI.getOperand(i: 2).getReg();
2062 if (std::optional<ConstantRange> ShAmtRange =
2063 getValidShiftAmountRange(R: Src2, DemandedElts, Depth: Depth + 1)) {
2064 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
2065 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
2066
2067 MachineInstr &ExtMI = *MRI.getVRegDef(Reg: Src1);
2068 unsigned ExtOpc = ExtMI.getOpcode();
2069
2070 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
2071 // shifted out, then we can compute the number of sign bits for the
2072 // operand being extended. A future improvement could be to pass along the
2073 // "shifted left by" information in the recursive calls to
2074 // ComputeKnownSignBits. Allowing us to handle this more generically.
2075 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
2076 ExtOpc == TargetOpcode::G_ANYEXT) {
2077 LLT ExtTy = MRI.getType(Reg: Src1);
2078 Register Extendee = ExtMI.getOperand(i: 1).getReg();
2079 LLT ExtendeeTy = MRI.getType(Reg: Extendee);
2080 uint64_t SizeDiff =
2081 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
2082
2083 if (SizeDiff <= MinShAmt) {
2084 unsigned Tmp =
2085 SizeDiff + computeNumSignBits(R: Extendee, DemandedElts, Depth: Depth + 1);
2086 if (MaxShAmt < Tmp)
2087 return Tmp - MaxShAmt;
2088 }
2089 }
2090 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
2091 unsigned Tmp = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2092 if (MaxShAmt < Tmp)
2093 return Tmp - MaxShAmt;
2094 }
2095 break;
2096 }
2097 case TargetOpcode::G_TRUNC: {
2098 Register Src = MI.getOperand(i: 1).getReg();
2099 LLT SrcTy = MRI.getType(Reg: Src);
2100
2101 // Check if the sign bits of source go down as far as the truncated value.
2102 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2103 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2104 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2105 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2106 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2107 break;
2108 }
2109 case TargetOpcode::G_SELECT: {
2110 return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(),
2111 Src1: MI.getOperand(i: 3).getReg(), DemandedElts,
2112 Depth: Depth + 1);
2113 }
2114 case TargetOpcode::G_SMIN:
2115 case TargetOpcode::G_SMAX:
2116 case TargetOpcode::G_UMIN:
2117 case TargetOpcode::G_UMAX:
2118 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2119 return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(),
2120 Src1: MI.getOperand(i: 2).getReg(), DemandedElts,
2121 Depth: Depth + 1);
2122 case TargetOpcode::G_SADDO:
2123 case TargetOpcode::G_SADDE:
2124 case TargetOpcode::G_UADDO:
2125 case TargetOpcode::G_UADDE:
2126 case TargetOpcode::G_SSUBO:
2127 case TargetOpcode::G_SSUBE:
2128 case TargetOpcode::G_USUBO:
2129 case TargetOpcode::G_USUBE:
2130 case TargetOpcode::G_SMULO:
2131 case TargetOpcode::G_UMULO: {
2132 // If compares returns 0/-1, all bits are sign bits.
2133 // We know that we have an integer-based boolean since these operations
2134 // are only available for integer.
2135 if (MI.getOperand(i: 1).getReg() == R) {
2136 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
2137 TargetLowering::ZeroOrNegativeOneBooleanContent)
2138 return TyBits;
2139 }
2140
2141 break;
2142 }
2143 case TargetOpcode::G_SUB: {
2144 Register Src2 = MI.getOperand(i: 2).getReg();
2145 unsigned Src2NumSignBits =
2146 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2147 if (Src2NumSignBits == 1)
2148 return 1; // Early out.
2149
2150 // Handle NEG.
2151 Register Src1 = MI.getOperand(i: 1).getReg();
2152 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2153 if (Known1.isZero()) {
2154 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2155 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2156 // sign bits set.
2157 if ((Known2.Zero | 1).isAllOnes())
2158 return TyBits;
2159
2160 // If the input is known to be positive (the sign bit is known clear),
2161 // the output of the NEG has, at worst, the same number of sign bits as
2162 // the input.
2163 if (Known2.isNonNegative()) {
2164 FirstAnswer = Src2NumSignBits;
2165 break;
2166 }
2167
2168 // Otherwise, we treat this like a SUB.
2169 }
2170
2171 unsigned Src1NumSignBits =
2172 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2173 if (Src1NumSignBits == 1)
2174 return 1; // Early Out.
2175
2176 // Sub can have at most one carry bit. Thus we know that the output
2177 // is, at worst, one more bit than the inputs.
2178 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2179 break;
2180 }
2181 case TargetOpcode::G_ADD: {
2182 Register Src2 = MI.getOperand(i: 2).getReg();
2183 unsigned Src2NumSignBits =
2184 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2185 if (Src2NumSignBits <= 2)
2186 return 1; // Early out.
2187
2188 Register Src1 = MI.getOperand(i: 1).getReg();
2189 unsigned Src1NumSignBits =
2190 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2191 if (Src1NumSignBits == 1)
2192 return 1; // Early Out.
2193
2194 // Special case decrementing a value (ADD X, -1):
2195 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2196 if (Known2.isAllOnes()) {
2197 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2198 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2199 // sign bits set.
2200 if ((Known1.Zero | 1).isAllOnes())
2201 return TyBits;
2202
2203 // If we are subtracting one from a positive number, there is no carry
2204 // out of the result.
2205 if (Known1.isNonNegative()) {
2206 FirstAnswer = Src1NumSignBits;
2207 break;
2208 }
2209
2210 // Otherwise, we treat this like an ADD.
2211 }
2212
2213 // Add can have at most one carry bit. Thus we know that the output
2214 // is, at worst, one more bit than the inputs.
2215 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2216 break;
2217 }
2218 case TargetOpcode::G_FCMP:
2219 case TargetOpcode::G_ICMP: {
2220 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2221 if (TyBits == 1)
2222 break;
2223 auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP);
2224 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
2225 return TyBits; // All bits are sign bits.
2226 if (BC == TargetLowering::ZeroOrOneBooleanContent)
2227 return TyBits - 1; // Every always-zero bit is a sign bit.
2228 break;
2229 }
2230 case TargetOpcode::G_BUILD_VECTOR: {
2231 // Collect the known bits that are shared by every demanded vector element.
2232 FirstAnswer = TyBits;
2233 APInt SingleDemandedElt(1, 1);
2234 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2235 if (!DemandedElts[I])
2236 continue;
2237
2238 unsigned Tmp2 =
2239 computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1);
2240 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2241
2242 // If we don't know any bits, early out.
2243 if (FirstAnswer == 1)
2244 break;
2245 }
2246 break;
2247 }
2248 case TargetOpcode::G_CONCAT_VECTORS: {
2249 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
2250 break;
2251 FirstAnswer = TyBits;
2252 // Determine the minimum number of sign bits across all demanded
2253 // elts of the input vectors. Early out if the result is already 1.
2254 unsigned NumSubVectorElts =
2255 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
2256 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2257 APInt DemandedSub =
2258 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
2259 if (!DemandedSub)
2260 continue;
2261 unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1);
2262
2263 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2264
2265 // If we don't know any bits, early out.
2266 if (FirstAnswer == 1)
2267 break;
2268 }
2269 break;
2270 }
2271 case TargetOpcode::G_SHUFFLE_VECTOR: {
2272 // Collect the minimum number of sign bits that are shared by every vector
2273 // element referenced by the shuffle.
2274 APInt DemandedLHS, DemandedRHS;
2275 Register Src1 = MI.getOperand(i: 1).getReg();
2276 unsigned NumElts = MRI.getType(Reg: Src1).getNumElements();
2277 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
2278 DemandedElts, DemandedLHS, DemandedRHS))
2279 return 1;
2280
2281 if (!!DemandedLHS)
2282 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1);
2283 // If we don't know anything, early out and try computeKnownBits fall-back.
2284 if (FirstAnswer == 1)
2285 break;
2286 if (!!DemandedRHS) {
2287 unsigned Tmp2 =
2288 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1);
2289 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2290 }
2291 break;
2292 }
2293 case TargetOpcode::G_SPLAT_VECTOR: {
2294 // Check if the sign bits of source go down as far as the truncated value.
2295 Register Src = MI.getOperand(i: 1).getReg();
2296 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1);
2297 unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits();
2298 if (NumSrcSignBits > (NumSrcBits - TyBits))
2299 return NumSrcSignBits - (NumSrcBits - TyBits);
2300 break;
2301 }
2302 case TargetOpcode::G_INTRINSIC:
2303 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2304 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2305 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2306 default: {
2307 unsigned NumBits =
2308 TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth);
2309 if (NumBits > 1)
2310 FirstAnswer = std::max(a: FirstAnswer, b: NumBits);
2311 break;
2312 }
2313 }
2314
2315 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2316 // use this information.
2317 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2318 APInt Mask;
2319 if (Known.isNonNegative()) { // sign bit is 0
2320 Mask = Known.Zero;
2321 } else if (Known.isNegative()) { // sign bit is 1;
2322 Mask = Known.One;
2323 } else {
2324 // Nothing known.
2325 return FirstAnswer;
2326 }
2327
2328 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2329 // the number of identical bits in the top of the input value.
2330 Mask <<= Mask.getBitWidth() - TyBits;
2331 return std::max(a: FirstAnswer, b: Mask.countl_one());
2332}
2333
2334unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2335 LLT Ty = MRI.getType(Reg: R);
2336 APInt DemandedElts =
2337 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
2338 return computeNumSignBits(R, DemandedElts, Depth);
2339}
2340
2341std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
2342 Register R, const APInt &DemandedElts, unsigned Depth) {
2343 // Shifting more than the bitwidth is not valid.
2344 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2345 unsigned Opcode = MI.getOpcode();
2346
2347 LLT Ty = MRI.getType(Reg: R);
2348 unsigned BitWidth = Ty.getScalarSizeInBits();
2349
2350 if (Opcode == TargetOpcode::G_CONSTANT) {
2351 const APInt &ShAmt = MI.getOperand(i: 1).getCImm()->getValue();
2352 if (ShAmt.uge(RHS: BitWidth))
2353 return std::nullopt;
2354 return ConstantRange(ShAmt);
2355 }
2356
2357 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2358 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2359 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2360 if (!DemandedElts[I])
2361 continue;
2362 MachineInstr *Op = MRI.getVRegDef(Reg: MI.getOperand(i: I + 1).getReg());
2363 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2364 MinAmt = MaxAmt = nullptr;
2365 break;
2366 }
2367
2368 const APInt &ShAmt = Op->getOperand(i: 1).getCImm()->getValue();
2369 if (ShAmt.uge(RHS: BitWidth))
2370 return std::nullopt;
2371 if (!MinAmt || MinAmt->ugt(RHS: ShAmt))
2372 MinAmt = &ShAmt;
2373 if (!MaxAmt || MaxAmt->ult(RHS: ShAmt))
2374 MaxAmt = &ShAmt;
2375 }
2376 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2377 "Failed to find matching min/max shift amounts");
2378 if (MinAmt && MaxAmt)
2379 return ConstantRange(*MinAmt, *MaxAmt + 1);
2380 }
2381
2382 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2383 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2384 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2385 if (KnownAmt.getMaxValue().ult(RHS: BitWidth))
2386 return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false);
2387
2388 return std::nullopt;
2389}
2390
2391std::optional<uint64_t> GISelValueTracking::getValidMinimumShiftAmount(
2392 Register R, const APInt &DemandedElts, unsigned Depth) {
2393 if (std::optional<ConstantRange> AmtRange =
2394 getValidShiftAmountRange(R, DemandedElts, Depth))
2395 return AmtRange->getUnsignedMin().getZExtValue();
2396 return std::nullopt;
2397}
2398
2399void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2400 AnalysisUsage &AU) const {
2401 AU.setPreservesAll();
2402 MachineFunctionPass::getAnalysisUsage(AU);
2403}
2404
2405bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2406 MachineFunction &MF) {
2407 return false;
2408}
2409
2410GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2411 if (!Info) {
2412 unsigned MaxDepth =
2413 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2414 Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth);
2415 }
2416 return *Info;
2417}
2418
2419AnalysisKey GISelValueTrackingAnalysis::Key;
2420
2421GISelValueTracking
2422GISelValueTrackingAnalysis::run(MachineFunction &MF,
2423 MachineFunctionAnalysisManager &MFAM) {
2424 return Result(MF);
2425}
2426
2427PreservedAnalyses
2428GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2429 MachineFunctionAnalysisManager &MFAM) {
2430 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
2431 const auto &MRI = MF.getRegInfo();
2432 OS << "name: ";
2433 MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false);
2434 OS << '\n';
2435
2436 for (MachineBasicBlock &BB : MF) {
2437 for (MachineInstr &MI : BB) {
2438 for (MachineOperand &MO : MI.defs()) {
2439 if (!MO.isReg() || MO.getReg().isPhysical())
2440 continue;
2441 Register Reg = MO.getReg();
2442 if (!MRI.getType(Reg).isValid())
2443 continue;
2444 KnownBits Known = VTA.getKnownBits(R: Reg);
2445 unsigned SignedBits = VTA.computeNumSignBits(R: Reg);
2446 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2447 << '\n';
2448 };
2449 }
2450 }
2451 return PreservedAnalyses::all();
2452}
2453