1//===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2//*-===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10/// Provides analysis for querying information about KnownBits during GISel
11/// passes.
12//
13//===----------------------------------------------------------------------===//
14#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/FloatingPointMode.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/StringExtras.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/Analysis/VectorUtils.h"
21#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23#include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24#include "llvm/CodeGen/GlobalISel/Utils.h"
25#include "llvm/CodeGen/LowLevelTypeUtils.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineInstr.h"
28#include "llvm/CodeGen/MachineOperand.h"
29#include "llvm/CodeGen/MachineRegisterInfo.h"
30#include "llvm/CodeGen/Register.h"
31#include "llvm/CodeGen/TargetLowering.h"
32#include "llvm/CodeGen/TargetOpcodes.h"
33#include "llvm/IR/ConstantRange.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/FMF.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/KnownBits.h"
39#include "llvm/Support/KnownFPClass.h"
40#include "llvm/Target/TargetMachine.h"
41
42#define DEBUG_TYPE "gisel-known-bits"
43
44using namespace llvm;
45using namespace MIPatternMatch;
46
47char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
48
49INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
50 "Analysis for ComputingKnownBits", false, true)
51
52GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
53 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
54 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
55
56Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
57 const MachineInstr *MI = MRI.getVRegDef(Reg: R);
58 switch (MI->getOpcode()) {
59 case TargetOpcode::COPY:
60 return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth);
61 case TargetOpcode::G_ASSERT_ALIGN: {
62 // TODO: Min with source
63 return Align(MI->getOperand(i: 2).getImm());
64 }
65 case TargetOpcode::G_FRAME_INDEX: {
66 int FrameIdx = MI->getOperand(i: 1).getIndex();
67 return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx);
68 }
69 case TargetOpcode::G_INTRINSIC:
70 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
71 case TargetOpcode::G_INTRINSIC_CONVERGENT:
72 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
73 default:
74 return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1);
75 }
76}
77
78KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
79 assert(MI.getNumExplicitDefs() == 1 &&
80 "expected single return generic instruction");
81 return getKnownBits(R: MI.getOperand(i: 0).getReg());
82}
83
84KnownBits GISelValueTracking::getKnownBits(Register R) {
85 const LLT Ty = MRI.getType(Reg: R);
86 // Since the number of lanes in a scalable vector is unknown at compile time,
87 // we track one bit which is implicitly broadcast to all lanes. This means
88 // that all lanes in a scalable vector are considered demanded.
89 APInt DemandedElts =
90 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
91 return getKnownBits(R, DemandedElts);
92}
93
94KnownBits GISelValueTracking::getKnownBits(Register R,
95 const APInt &DemandedElts,
96 unsigned Depth) {
97 KnownBits Known;
98 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
99 return Known;
100}
101
102bool GISelValueTracking::signBitIsZero(Register R) {
103 LLT Ty = MRI.getType(Reg: R);
104 unsigned BitWidth = Ty.getScalarSizeInBits();
105 return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth));
106}
107
108APInt GISelValueTracking::getKnownZeroes(Register R) {
109 return getKnownBits(R).Zero;
110}
111
112APInt GISelValueTracking::getKnownOnes(Register R) {
113 return getKnownBits(R).One;
114}
115
116[[maybe_unused]] static void
117dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
118 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
119 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
120 << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n"
121 << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false)
122 << "\n"
123 << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false)
124 << "\n";
125}
126
127/// Compute known bits for the intersection of \p Src0 and \p Src1
128void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
129 KnownBits &Known,
130 const APInt &DemandedElts,
131 unsigned Depth) {
132 // Test src1 first, since we canonicalize simpler expressions to the RHS.
133 computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth);
134
135 // If we don't know any bits, early out.
136 if (Known.isUnknown())
137 return;
138
139 KnownBits Known2;
140 computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth);
141
142 // Only known if known in both the LHS and RHS.
143 Known = Known.intersectWith(RHS: Known2);
144}
145
146// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
147// created using Width. Use this function when the inputs are KnownBits
148// objects. TODO: Move this KnownBits.h if this is usable in more cases.
149static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
150 const KnownBits &OffsetKnown,
151 const KnownBits &WidthKnown) {
152 KnownBits Mask(BitWidth);
153 Mask.Zero = APInt::getBitsSetFrom(
154 numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth));
155 Mask.One = APInt::getLowBitsSet(
156 numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth));
157 return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask;
158}
159
160void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
161 const APInt &DemandedElts,
162 unsigned Depth) {
163 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
164 unsigned Opcode = MI.getOpcode();
165 LLT DstTy = MRI.getType(Reg: R);
166
167 // Handle the case where this is called on a register that does not have a
168 // type constraint. For example, it may be post-ISel or this target might not
169 // preserve the type when early-selecting instructions.
170 if (!DstTy.isValid()) {
171 Known = KnownBits();
172 return;
173 }
174
175#ifndef NDEBUG
176 if (DstTy.isFixedVector()) {
177 assert(
178 DstTy.getNumElements() == DemandedElts.getBitWidth() &&
179 "DemandedElt width should equal the fixed vector number of elements");
180 } else {
181 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
182 "DemandedElt width should be 1 for scalars or scalable vectors");
183 }
184#endif
185
186 unsigned BitWidth = DstTy.getScalarSizeInBits();
187 Known = KnownBits(BitWidth); // Don't know anything
188
189 // Depth may get bigger than max depth if it gets passed to a different
190 // GISelValueTracking object.
191 // This may happen when say a generic part uses a GISelValueTracking object
192 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
193 // which creates a new GISelValueTracking object with a different and smaller
194 // depth. If we just check for equality, we would never exit if the depth
195 // that is passed down to the target specific GISelValueTracking object is
196 // already bigger than its max depth.
197 if (Depth >= getMaxDepth())
198 return;
199
200 if (!DemandedElts)
201 return; // No demanded elts, better to assume we don't know anything.
202
203 KnownBits Known2;
204
205 switch (Opcode) {
206 default:
207 TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
208 Depth);
209 break;
210 case TargetOpcode::G_BUILD_VECTOR: {
211 // Collect the known bits that are shared by every demanded vector element.
212 Known.Zero.setAllBits();
213 Known.One.setAllBits();
214 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
215 if (!DemandedElts[I])
216 continue;
217
218 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1);
219
220 // Known bits are the values that are shared by every demanded element.
221 Known = Known.intersectWith(RHS: Known2);
222
223 // If we don't know any bits, early out.
224 if (Known.isUnknown())
225 break;
226 }
227 break;
228 }
229 case TargetOpcode::G_SPLAT_VECTOR: {
230 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1),
231 Depth: Depth + 1);
232 // Implicitly truncate the bits to match the official semantics of
233 // G_SPLAT_VECTOR.
234 Known = Known.trunc(BitWidth);
235 break;
236 }
237 case TargetOpcode::COPY:
238 case TargetOpcode::G_PHI:
239 case TargetOpcode::PHI: {
240 Known.One = APInt::getAllOnes(numBits: BitWidth);
241 Known.Zero = APInt::getAllOnes(numBits: BitWidth);
242 // Destination registers should not have subregisters at this
243 // point of the pipeline, otherwise the main live-range will be
244 // defined more than once, which is against SSA.
245 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
246 // PHI's operand are a mix of registers and basic blocks interleaved.
247 // We only care about the register ones.
248 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
249 const MachineOperand &Src = MI.getOperand(i: Idx);
250 Register SrcReg = Src.getReg();
251 LLT SrcTy = MRI.getType(Reg: SrcReg);
252 // Look through trivial copies and phis but don't look through trivial
253 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
254 // analysis is currently unable to determine the bit width of a
255 // register class.
256 //
257 // We can't use NoSubRegister by name as it's defined by each target but
258 // it's always defined to be 0 by tablegen.
259 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
260 SrcTy.isValid()) {
261 APInt NowDemandedElts;
262 if (!SrcTy.isFixedVector()) {
263 NowDemandedElts = APInt(1, 1);
264 } else if (DstTy.isFixedVector() &&
265 SrcTy.getNumElements() == DstTy.getNumElements()) {
266 NowDemandedElts = DemandedElts;
267 } else {
268 NowDemandedElts = APInt::getAllOnes(numBits: SrcTy.getNumElements());
269 }
270
271 // For COPYs we don't do anything, don't increase the depth.
272 computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts: NowDemandedElts,
273 Depth: Depth + (Opcode != TargetOpcode::COPY));
274 Known2 = Known2.anyextOrTrunc(BitWidth);
275 Known = Known.intersectWith(RHS: Known2);
276 // If we reach a point where we don't know anything
277 // just stop looking through the operands.
278 if (Known.isUnknown())
279 break;
280 } else {
281 // We know nothing.
282 Known = KnownBits(BitWidth);
283 break;
284 }
285 }
286 break;
287 }
288 case TargetOpcode::G_CONSTANT: {
289 Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue());
290 break;
291 }
292 case TargetOpcode::G_FRAME_INDEX: {
293 int FrameIdx = MI.getOperand(i: 1).getIndex();
294 TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF);
295 break;
296 }
297 case TargetOpcode::G_SUB: {
298 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
299 Depth: Depth + 1);
300 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
301 Depth: Depth + 1);
302 Known = KnownBits::sub(LHS: Known, RHS: Known2);
303 break;
304 }
305 case TargetOpcode::G_XOR: {
306 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
307 Depth: Depth + 1);
308 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
309 Depth: Depth + 1);
310
311 Known ^= Known2;
312 break;
313 }
314 case TargetOpcode::G_PTR_ADD: {
315 if (DstTy.isVector())
316 break;
317 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
318 LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg());
319 if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace()))
320 break;
321 [[fallthrough]];
322 }
323 case TargetOpcode::G_ADD: {
324 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
325 Depth: Depth + 1);
326 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
327 Depth: Depth + 1);
328 Known = KnownBits::add(LHS: Known, RHS: Known2);
329 break;
330 }
331 case TargetOpcode::G_AND: {
332 // If either the LHS or the RHS are Zero, the result is zero.
333 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
334 Depth: Depth + 1);
335 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
336 Depth: Depth + 1);
337
338 Known &= Known2;
339 break;
340 }
341 case TargetOpcode::G_OR: {
342 // If either the LHS or the RHS are Zero, the result is zero.
343 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
344 Depth: Depth + 1);
345 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
346 Depth: Depth + 1);
347
348 Known |= Known2;
349 break;
350 }
351 case TargetOpcode::G_MUL: {
352 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
353 Depth: Depth + 1);
354 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
355 Depth: Depth + 1);
356 Known = KnownBits::mul(LHS: Known, RHS: Known2);
357 break;
358 }
359 case TargetOpcode::G_UMULH: {
360 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
361 Depth: Depth + 1);
362 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
363 Depth: Depth + 1);
364 Known = KnownBits::mulhu(LHS: Known, RHS: Known2);
365 break;
366 }
367 case TargetOpcode::G_SMULH: {
368 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
369 Depth: Depth + 1);
370 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
371 Depth: Depth + 1);
372 Known = KnownBits::mulhs(LHS: Known, RHS: Known2);
373 break;
374 }
375 case TargetOpcode::G_UDIV: {
376 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
377 Depth: Depth + 1);
378 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
379 Depth: Depth + 1);
380 Known = KnownBits::udiv(LHS: Known, RHS: Known2,
381 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
382 break;
383 }
384 case TargetOpcode::G_SDIV: {
385 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
386 Depth: Depth + 1);
387 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts,
388 Depth: Depth + 1);
389 Known = KnownBits::sdiv(LHS: Known, RHS: Known2,
390 Exact: MI.getFlag(Flag: MachineInstr::MIFlag::IsExact));
391 break;
392 }
393 case TargetOpcode::G_SELECT: {
394 computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(),
395 Known, DemandedElts, Depth: Depth + 1);
396 break;
397 }
398 case TargetOpcode::G_SMIN: {
399 // TODO: Handle clamp pattern with number of sign bits
400 KnownBits KnownRHS;
401 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
402 Depth: Depth + 1);
403 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
404 Depth: Depth + 1);
405 Known = KnownBits::smin(LHS: Known, RHS: KnownRHS);
406 break;
407 }
408 case TargetOpcode::G_SMAX: {
409 // TODO: Handle clamp pattern with number of sign bits
410 KnownBits KnownRHS;
411 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
412 Depth: Depth + 1);
413 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
414 Depth: Depth + 1);
415 Known = KnownBits::smax(LHS: Known, RHS: KnownRHS);
416 break;
417 }
418 case TargetOpcode::G_UMIN: {
419 KnownBits KnownRHS;
420 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
421 Depth: Depth + 1);
422 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
423 Depth: Depth + 1);
424 Known = KnownBits::umin(LHS: Known, RHS: KnownRHS);
425 break;
426 }
427 case TargetOpcode::G_UMAX: {
428 KnownBits KnownRHS;
429 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
430 Depth: Depth + 1);
431 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts,
432 Depth: Depth + 1);
433 Known = KnownBits::umax(LHS: Known, RHS: KnownRHS);
434 break;
435 }
436 case TargetOpcode::G_FCMP:
437 case TargetOpcode::G_ICMP: {
438 if (DstTy.isVector())
439 break;
440 if (TL.getBooleanContents(isVec: DstTy.isVector(),
441 isFloat: Opcode == TargetOpcode::G_FCMP) ==
442 TargetLowering::ZeroOrOneBooleanContent &&
443 BitWidth > 1)
444 Known.Zero.setBitsFrom(1);
445 break;
446 }
447 case TargetOpcode::G_SEXT: {
448 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
449 Depth: Depth + 1);
450 // If the sign bit is known to be zero or one, then sext will extend
451 // it to the top bits, else it will just zext.
452 Known = Known.sext(BitWidth);
453 break;
454 }
455 case TargetOpcode::G_ASSERT_SEXT:
456 case TargetOpcode::G_SEXT_INREG: {
457 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
458 Depth: Depth + 1);
459 Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm());
460 break;
461 }
462 case TargetOpcode::G_ANYEXT: {
463 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts,
464 Depth: Depth + 1);
465 Known = Known.anyext(BitWidth);
466 break;
467 }
468 case TargetOpcode::G_LOAD: {
469 const MachineMemOperand *MMO = *MI.memoperands_begin();
470 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
471 if (const MDNode *Ranges = MMO->getRanges())
472 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
473 Known = KnownRange.anyext(BitWidth: Known.getBitWidth());
474 break;
475 }
476 case TargetOpcode::G_SEXTLOAD:
477 case TargetOpcode::G_ZEXTLOAD: {
478 if (DstTy.isVector())
479 break;
480 const MachineMemOperand *MMO = *MI.memoperands_begin();
481 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
482 if (const MDNode *Ranges = MMO->getRanges())
483 computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange);
484 Known = Opcode == TargetOpcode::G_SEXTLOAD
485 ? KnownRange.sext(BitWidth: Known.getBitWidth())
486 : KnownRange.zext(BitWidth: Known.getBitWidth());
487 break;
488 }
489 case TargetOpcode::G_ASHR: {
490 KnownBits LHSKnown, RHSKnown;
491 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
492 Depth: Depth + 1);
493 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
494 Depth: Depth + 1);
495 Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown);
496 break;
497 }
498 case TargetOpcode::G_LSHR: {
499 KnownBits LHSKnown, RHSKnown;
500 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
501 Depth: Depth + 1);
502 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
503 Depth: Depth + 1);
504 Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown);
505 break;
506 }
507 case TargetOpcode::G_SHL: {
508 KnownBits LHSKnown, RHSKnown;
509 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts,
510 Depth: Depth + 1);
511 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts,
512 Depth: Depth + 1);
513 Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown);
514 break;
515 }
516 case TargetOpcode::G_ROTL:
517 case TargetOpcode::G_ROTR: {
518 MachineInstr *AmtOpMI = MRI.getVRegDef(Reg: MI.getOperand(i: 2).getReg());
519 auto MaybeAmtOp = isConstantOrConstantSplatVector(MI&: *AmtOpMI, MRI);
520 if (!MaybeAmtOp)
521 break;
522
523 Register SrcReg = MI.getOperand(i: 1).getReg();
524 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
525
526 unsigned Amt = MaybeAmtOp->urem(RHS: BitWidth);
527
528 // Canonicalize to ROTR.
529 if (Opcode == TargetOpcode::G_ROTL)
530 Amt = BitWidth - Amt;
531
532 Known.Zero = Known.Zero.rotr(rotateAmt: Amt);
533 Known.One = Known.One.rotr(rotateAmt: Amt);
534 break;
535 }
536 case TargetOpcode::G_INTTOPTR:
537 case TargetOpcode::G_PTRTOINT:
538 if (DstTy.isVector())
539 break;
540 // Fall through and handle them the same as zext/trunc.
541 [[fallthrough]];
542 case TargetOpcode::G_ZEXT:
543 case TargetOpcode::G_TRUNC: {
544 Register SrcReg = MI.getOperand(i: 1).getReg();
545 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
546 Known = Known.zextOrTrunc(BitWidth);
547 break;
548 }
549 case TargetOpcode::G_ASSERT_ZEXT: {
550 Register SrcReg = MI.getOperand(i: 1).getReg();
551 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
552
553 unsigned SrcBitWidth = MI.getOperand(i: 2).getImm();
554 assert(SrcBitWidth && "SrcBitWidth can't be zero");
555 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth);
556 Known.Zero |= (~InMask);
557 Known.One &= (~Known.Zero);
558 break;
559 }
560 case TargetOpcode::G_ASSERT_ALIGN: {
561 int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm());
562
563 // TODO: Should use maximum with source
564 // If a node is guaranteed to be aligned, set low zero bits accordingly as
565 // well as clearing one bits.
566 Known.Zero.setLowBits(LogOfAlign);
567 Known.One.clearLowBits(loBits: LogOfAlign);
568 break;
569 }
570 case TargetOpcode::G_MERGE_VALUES: {
571 unsigned NumOps = MI.getNumOperands();
572 unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
573
574 for (unsigned I = 0; I != NumOps - 1; ++I) {
575 KnownBits SrcOpKnown;
576 computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown,
577 DemandedElts, Depth: Depth + 1);
578 Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize);
579 }
580 break;
581 }
582 case TargetOpcode::G_UNMERGE_VALUES: {
583 unsigned NumOps = MI.getNumOperands();
584 Register SrcReg = MI.getOperand(i: NumOps - 1).getReg();
585 LLT SrcTy = MRI.getType(Reg: SrcReg);
586
587 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
588 return; // TODO: Handle vector->subelement unmerges
589
590 // Figure out the result operand index
591 unsigned DstIdx = 0;
592 for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R;
593 ++DstIdx)
594 ;
595
596 APInt SubDemandedElts = DemandedElts;
597 if (SrcTy.isVector()) {
598 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
599 SubDemandedElts =
600 DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes);
601 }
602
603 KnownBits SrcOpKnown;
604 computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1);
605
606 if (SrcTy.isVector())
607 Known = std::move(SrcOpKnown);
608 else
609 Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx);
610 break;
611 }
612 case TargetOpcode::G_BSWAP: {
613 Register SrcReg = MI.getOperand(i: 1).getReg();
614 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
615 Known = Known.byteSwap();
616 break;
617 }
618 case TargetOpcode::G_BITREVERSE: {
619 Register SrcReg = MI.getOperand(i: 1).getReg();
620 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
621 Known = Known.reverseBits();
622 break;
623 }
624 case TargetOpcode::G_CTPOP: {
625 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts,
626 Depth: Depth + 1);
627 // We can bound the space the count needs. Also, bits known to be zero
628 // can't contribute to the population.
629 unsigned BitsPossiblySet = Known2.countMaxPopulation();
630 unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet);
631 Known.Zero.setBitsFrom(LowBits);
632 // TODO: we could bound Known.One using the lower bound on the number of
633 // bits which might be set provided by popcnt KnownOne2.
634 break;
635 }
636 case TargetOpcode::G_UBFX: {
637 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
638 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
639 Depth: Depth + 1);
640 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
641 Depth: Depth + 1);
642 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
643 Depth: Depth + 1);
644 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
645 break;
646 }
647 case TargetOpcode::G_SBFX: {
648 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
649 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
650 Depth: Depth + 1);
651 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts,
652 Depth: Depth + 1);
653 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts,
654 Depth: Depth + 1);
655 OffsetKnown = OffsetKnown.sext(BitWidth);
656 WidthKnown = WidthKnown.sext(BitWidth);
657 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
658 // Sign extend the extracted value using shift left and arithmetic shift
659 // right.
660 KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth));
661 KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown);
662 Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown);
663 break;
664 }
665 case TargetOpcode::G_UADDO:
666 case TargetOpcode::G_UADDE:
667 case TargetOpcode::G_SADDO:
668 case TargetOpcode::G_SADDE: {
669 if (MI.getOperand(i: 1).getReg() == R) {
670 // If we know the result of a compare has the top bits zero, use this
671 // info.
672 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
673 TargetLowering::ZeroOrOneBooleanContent &&
674 BitWidth > 1)
675 Known.Zero.setBitsFrom(1);
676 break;
677 }
678
679 assert(MI.getOperand(0).getReg() == R &&
680 "We only compute knownbits for the sum here.");
681 // With [US]ADDE, a carry bit may be added in.
682 KnownBits Carry(1);
683 if (Opcode == TargetOpcode::G_UADDE || Opcode == TargetOpcode::G_SADDE) {
684 computeKnownBitsImpl(R: MI.getOperand(i: 4).getReg(), Known&: Carry, DemandedElts,
685 Depth: Depth + 1);
686 // Carry has bit width 1
687 Carry = Carry.trunc(BitWidth: 1);
688 } else {
689 Carry.setAllZero();
690 }
691
692 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts,
693 Depth: Depth + 1);
694 computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: Known2, DemandedElts,
695 Depth: Depth + 1);
696 Known = KnownBits::computeForAddCarry(LHS: Known, RHS: Known2, Carry);
697 break;
698 }
699 case TargetOpcode::G_USUBO:
700 case TargetOpcode::G_USUBE:
701 case TargetOpcode::G_SSUBO:
702 case TargetOpcode::G_SSUBE:
703 case TargetOpcode::G_UMULO:
704 case TargetOpcode::G_SMULO: {
705 if (MI.getOperand(i: 1).getReg() == R) {
706 // If we know the result of a compare has the top bits zero, use this
707 // info.
708 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
709 TargetLowering::ZeroOrOneBooleanContent &&
710 BitWidth > 1)
711 Known.Zero.setBitsFrom(1);
712 }
713 break;
714 }
715 case TargetOpcode::G_CTTZ:
716 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
717 KnownBits SrcOpKnown;
718 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
719 Depth: Depth + 1);
720 // If we have a known 1, its position is our upper bound
721 unsigned PossibleTZ = SrcOpKnown.countMaxTrailingZeros();
722 unsigned LowBits = llvm::bit_width(Value: PossibleTZ);
723 Known.Zero.setBitsFrom(LowBits);
724 break;
725 }
726 case TargetOpcode::G_CTLZ:
727 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
728 KnownBits SrcOpKnown;
729 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts,
730 Depth: Depth + 1);
731 // If we have a known 1, its position is our upper bound.
732 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
733 unsigned LowBits = llvm::bit_width(Value: PossibleLZ);
734 Known.Zero.setBitsFrom(LowBits);
735 break;
736 }
737 case TargetOpcode::G_CTLS: {
738 Register Reg = MI.getOperand(i: 1).getReg();
739 unsigned MinRedundantSignBits = computeNumSignBits(R: Reg, Depth: Depth + 1) - 1;
740
741 unsigned MaxUpperRedundantSignBits = MRI.getType(Reg).getScalarSizeInBits();
742
743 ConstantRange Range(APInt(BitWidth, MinRedundantSignBits),
744 APInt(BitWidth, MaxUpperRedundantSignBits));
745
746 Known = Range.toKnownBits();
747 break;
748 }
749 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
750 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
751 Register InVec = Extract.getVectorReg();
752 Register EltNo = Extract.getIndexReg();
753
754 auto ConstEltNo = getIConstantVRegVal(VReg: EltNo, MRI);
755
756 LLT VecVT = MRI.getType(Reg: InVec);
757 // computeKnownBits not yet implemented for scalable vectors.
758 if (VecVT.isScalableVector())
759 break;
760
761 const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
762 const unsigned NumSrcElts = VecVT.getNumElements();
763 // A return type different from the vector's element type may lead to
764 // issues with pattern selection. Bail out to avoid that.
765 if (BitWidth > EltBitWidth)
766 break;
767
768 Known.Zero.setAllBits();
769 Known.One.setAllBits();
770
771 // If we know the element index, just demand that vector element, else for
772 // an unknown element index, ignore DemandedElts and demand them all.
773 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
774 if (ConstEltNo && ConstEltNo->ult(RHS: NumSrcElts))
775 DemandedSrcElts =
776 APInt::getOneBitSet(numBits: NumSrcElts, BitNo: ConstEltNo->getZExtValue());
777
778 computeKnownBitsImpl(R: InVec, Known, DemandedElts: DemandedSrcElts, Depth: Depth + 1);
779 break;
780 }
781 case TargetOpcode::G_SHUFFLE_VECTOR: {
782 APInt DemandedLHS, DemandedRHS;
783 // Collect the known bits that are shared by every vector element referenced
784 // by the shuffle.
785 unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
786 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
787 DemandedElts, DemandedLHS, DemandedRHS))
788 break;
789
790 // Known bits are the values that are shared by every demanded element.
791 Known.Zero.setAllBits();
792 Known.One.setAllBits();
793 if (!!DemandedLHS) {
794 computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS,
795 Depth: Depth + 1);
796 Known = Known.intersectWith(RHS: Known2);
797 }
798 // If we don't know any bits, early out.
799 if (Known.isUnknown())
800 break;
801 if (!!DemandedRHS) {
802 computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS,
803 Depth: Depth + 1);
804 Known = Known.intersectWith(RHS: Known2);
805 }
806 break;
807 }
808 case TargetOpcode::G_CONCAT_VECTORS: {
809 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
810 break;
811 // Split DemandedElts and test each of the demanded subvectors.
812 Known.Zero.setAllBits();
813 Known.One.setAllBits();
814 unsigned NumSubVectorElts =
815 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
816
817 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
818 APInt DemandedSub =
819 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
820 if (!!DemandedSub) {
821 computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1);
822
823 Known = Known.intersectWith(RHS: Known2);
824 }
825 // If we don't know any bits, early out.
826 if (Known.isUnknown())
827 break;
828 }
829 break;
830 }
831 case TargetOpcode::G_ABS: {
832 Register SrcReg = MI.getOperand(i: 1).getReg();
833 computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1);
834 Known = Known.abs();
835 Known.Zero.setHighBits(computeNumSignBits(R: SrcReg, DemandedElts, Depth: Depth + 1) -
836 1);
837 break;
838 }
839 }
840
841 LLVM_DEBUG(dumpResult(MI, Known, Depth));
842}
843
844static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
845 Ty = Ty.getScalarType();
846 DenormalMode Mode = MF.getDenormalMode(FPType: getFltSemanticForLLT(Ty));
847 return Mode.Output == DenormalMode::IEEE ||
848 Mode.Output == DenormalMode::PositiveZero;
849}
850
851void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
852 FPClassTest InterestedClasses,
853 unsigned Depth) {
854 LLT Ty = MRI.getType(Reg: R);
855 APInt DemandedElts =
856 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
857 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
858}
859
860void GISelValueTracking::computeKnownFPClassForFPTrunc(
861 const MachineInstr &MI, const APInt &DemandedElts,
862 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
863 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
864 fcNone)
865 return;
866
867 Register Val = MI.getOperand(i: 1).getReg();
868 KnownFPClass KnownSrc;
869 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
870 Depth: Depth + 1);
871
872 // Sign should be preserved
873 // TODO: Handle cannot be ordered greater than zero
874 if (KnownSrc.cannotBeOrderedLessThanZero())
875 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
876
877 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
878
879 // Infinity needs a range check.
880}
881
882void GISelValueTracking::computeKnownFPClass(Register R,
883 const APInt &DemandedElts,
884 FPClassTest InterestedClasses,
885 KnownFPClass &Known,
886 unsigned Depth) {
887 assert(Known.isUnknown() && "should not be called with known information");
888
889 if (!DemandedElts) {
890 // No demanded elts, better to assume we don't know anything.
891 Known.resetAll();
892 return;
893 }
894
895 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
896
897 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
898 unsigned Opcode = MI.getOpcode();
899 LLT DstTy = MRI.getType(Reg: R);
900
901 if (!DstTy.isValid()) {
902 Known.resetAll();
903 return;
904 }
905
906 if (auto Cst = GFConstant::getConstant(Const: R, MRI)) {
907 switch (Cst->getKind()) {
908 case GFConstant::GFConstantKind::Scalar: {
909 auto APF = Cst->getScalarValue();
910 Known.KnownFPClasses = APF.classify();
911 Known.SignBit = APF.isNegative();
912 break;
913 }
914 case GFConstant::GFConstantKind::FixedVector: {
915 Known.KnownFPClasses = fcNone;
916 bool SignBitAllZero = true;
917 bool SignBitAllOne = true;
918
919 for (auto C : *Cst) {
920 Known.KnownFPClasses |= C.classify();
921 if (C.isNegative())
922 SignBitAllZero = false;
923 else
924 SignBitAllOne = false;
925 }
926
927 if (SignBitAllOne != SignBitAllZero)
928 Known.SignBit = SignBitAllOne;
929
930 break;
931 }
932 case GFConstant::GFConstantKind::ScalableVector: {
933 Known.resetAll();
934 break;
935 }
936 }
937
938 return;
939 }
940
941 FPClassTest KnownNotFromFlags = fcNone;
942 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans))
943 KnownNotFromFlags |= fcNan;
944 if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs))
945 KnownNotFromFlags |= fcInf;
946
947 // We no longer need to find out about these bits from inputs if we can
948 // assume this from flags/attributes.
949 InterestedClasses &= ~KnownNotFromFlags;
950
951 llvm::scope_exit ClearClassesFromFlags(
952 [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); });
953
954 // All recursive calls that increase depth must come after this.
955 if (Depth == MaxAnalysisRecursionDepth)
956 return;
957
958 const MachineFunction *MF = MI.getMF();
959
960 switch (Opcode) {
961 default:
962 TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI,
963 Depth);
964 break;
965 case TargetOpcode::G_FNEG: {
966 Register Val = MI.getOperand(i: 1).getReg();
967 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
968 Known.fneg();
969 break;
970 }
971 case TargetOpcode::G_SELECT: {
972 GSelect &SelMI = cast<GSelect>(Val&: MI);
973 Register Cond = SelMI.getCondReg();
974 Register LHS = SelMI.getTrueReg();
975 Register RHS = SelMI.getFalseReg();
976
977 FPClassTest FilterLHS = fcAllFlags;
978 FPClassTest FilterRHS = fcAllFlags;
979
980 Register TestedValue;
981 FPClassTest MaskIfTrue = fcAllFlags;
982 FPClassTest MaskIfFalse = fcAllFlags;
983 FPClassTest ClassVal = fcNone;
984
985 CmpInst::Predicate Pred;
986 Register CmpLHS, CmpRHS;
987 if (mi_match(R: Cond, MRI,
988 P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) {
989 // If the select filters out a value based on the class, it no longer
990 // participates in the class of the result
991
992 // TODO: In some degenerate cases we can infer something if we try again
993 // without looking through sign operations.
994 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
995 std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) =
996 fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg);
997 } else if (mi_match(
998 R: Cond, MRI,
999 P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) {
1000 FPClassTest TestedMask = ClassVal;
1001 MaskIfTrue = TestedMask;
1002 MaskIfFalse = ~TestedMask;
1003 }
1004
1005 if (TestedValue == LHS) {
1006 // match !isnan(x) ? x : y
1007 FilterLHS = MaskIfTrue;
1008 } else if (TestedValue == RHS) { // && IsExactClass
1009 // match !isnan(x) ? y : x
1010 FilterRHS = MaskIfFalse;
1011 }
1012
1013 KnownFPClass Known2;
1014 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known,
1015 Depth: Depth + 1);
1016 Known.KnownFPClasses &= FilterLHS;
1017
1018 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS,
1019 Known&: Known2, Depth: Depth + 1);
1020 Known2.KnownFPClasses &= FilterRHS;
1021
1022 Known |= Known2;
1023 break;
1024 }
1025 case TargetOpcode::G_FCOPYSIGN: {
1026 Register Magnitude = MI.getOperand(i: 1).getReg();
1027 Register Sign = MI.getOperand(i: 2).getReg();
1028
1029 KnownFPClass KnownSign;
1030
1031 computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known,
1032 Depth: Depth + 1);
1033 computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign,
1034 Depth: Depth + 1);
1035 Known.copysign(Sign: KnownSign);
1036 break;
1037 }
1038 case TargetOpcode::G_FMA:
1039 case TargetOpcode::G_STRICT_FMA:
1040 case TargetOpcode::G_FMAD: {
1041 if ((InterestedClasses & fcNegative) == fcNone)
1042 break;
1043
1044 Register A = MI.getOperand(i: 1).getReg();
1045 Register B = MI.getOperand(i: 2).getReg();
1046 Register C = MI.getOperand(i: 3).getReg();
1047
1048 if (A != B)
1049 break;
1050
1051 // The multiply cannot be -0 and therefore the add can't be -0
1052 Known.knownNot(RuleOut: fcNegZero);
1053
1054 // x * x + y is non-negative if y is non-negative.
1055 KnownFPClass KnownAddend;
1056 computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend,
1057 Depth: Depth + 1);
1058
1059 if (KnownAddend.cannotBeOrderedLessThanZero())
1060 Known.knownNot(RuleOut: fcNegative);
1061 break;
1062 }
1063 case TargetOpcode::G_FSQRT:
1064 case TargetOpcode::G_STRICT_FSQRT: {
1065 KnownFPClass KnownSrc;
1066 FPClassTest InterestedSrcs = InterestedClasses;
1067 if (InterestedClasses & fcNan)
1068 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1069
1070 Register Val = MI.getOperand(i: 1).getReg();
1071
1072 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1073
1074 if (KnownSrc.isKnownNeverPosInfinity())
1075 Known.knownNot(RuleOut: fcPosInf);
1076 if (KnownSrc.isKnownNever(Mask: fcSNan))
1077 Known.knownNot(RuleOut: fcSNan);
1078
1079 // Any negative value besides -0 returns a nan.
1080 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1081 Known.knownNot(RuleOut: fcNan);
1082
1083 // The only negative value that can be returned is -0 for -0 inputs.
1084 Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal);
1085 break;
1086 }
1087 case TargetOpcode::G_FABS: {
1088 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
1089 Register Val = MI.getOperand(i: 1).getReg();
1090 // If we only care about the sign bit we don't need to inspect the
1091 // operand.
1092 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known,
1093 Depth: Depth + 1);
1094 }
1095 Known.fabs();
1096 break;
1097 }
1098 case TargetOpcode::G_FSIN:
1099 case TargetOpcode::G_FCOS:
1100 case TargetOpcode::G_FSINCOS: {
1101 // Return NaN on infinite inputs.
1102 Register Val = MI.getOperand(i: 1).getReg();
1103 KnownFPClass KnownSrc;
1104
1105 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1106 Depth: Depth + 1);
1107 Known.knownNot(RuleOut: fcInf);
1108
1109 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
1110 Known.knownNot(RuleOut: fcNan);
1111 break;
1112 }
1113 case TargetOpcode::G_FMAXNUM:
1114 case TargetOpcode::G_FMINNUM:
1115 case TargetOpcode::G_FMINNUM_IEEE:
1116 case TargetOpcode::G_FMAXIMUM:
1117 case TargetOpcode::G_FMINIMUM:
1118 case TargetOpcode::G_FMAXNUM_IEEE:
1119 case TargetOpcode::G_FMAXIMUMNUM:
1120 case TargetOpcode::G_FMINIMUMNUM: {
1121 Register LHS = MI.getOperand(i: 1).getReg();
1122 Register RHS = MI.getOperand(i: 2).getReg();
1123 KnownFPClass KnownLHS, KnownRHS;
1124
1125 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS,
1126 Depth: Depth + 1);
1127 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS,
1128 Depth: Depth + 1);
1129
1130 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
1131 Known = KnownLHS | KnownRHS;
1132
1133 // If either operand is not NaN, the result is not NaN.
1134 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
1135 Opcode == TargetOpcode::G_FMAXNUM ||
1136 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1137 Opcode == TargetOpcode::G_FMAXIMUMNUM))
1138 Known.knownNot(RuleOut: fcNan);
1139
1140 if (Opcode == TargetOpcode::G_FMAXNUM ||
1141 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1142 Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1143 // If at least one operand is known to be positive, the result must be
1144 // positive.
1145 if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1146 KnownLHS.isKnownNeverNaN()) ||
1147 (KnownRHS.cannotBeOrderedLessThanZero() &&
1148 KnownRHS.isKnownNeverNaN()))
1149 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1150 } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1151 // If at least one operand is known to be positive, the result must be
1152 // positive.
1153 if (KnownLHS.cannotBeOrderedLessThanZero() ||
1154 KnownRHS.cannotBeOrderedLessThanZero())
1155 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1156 } else if (Opcode == TargetOpcode::G_FMINNUM ||
1157 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1158 Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1159 // If at least one operand is known to be negative, the result must be
1160 // negative.
1161 if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1162 KnownLHS.isKnownNeverNaN()) ||
1163 (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1164 KnownRHS.isKnownNeverNaN()))
1165 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1166 } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1167 // If at least one operand is known to be negative, the result must be
1168 // negative.
1169 if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1170 KnownRHS.cannotBeOrderedGreaterThanZero())
1171 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1172 } else {
1173 llvm_unreachable("unhandled intrinsic");
1174 }
1175
1176 // Fixup zero handling if denormals could be returned as a zero.
1177 //
1178 // As there's no spec for denormal flushing, be conservative with the
1179 // treatment of denormals that could be flushed to zero. For older
1180 // subtargets on AMDGPU the min/max instructions would not flush the
1181 // output and return the original value.
1182 //
1183 if ((Known.KnownFPClasses & fcZero) != fcNone &&
1184 !Known.isKnownNeverSubnormal()) {
1185 DenormalMode Mode =
1186 MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()));
1187 if (Mode != DenormalMode::getIEEE())
1188 Known.KnownFPClasses |= fcZero;
1189 }
1190
1191 if (Known.isKnownNeverNaN()) {
1192 if (KnownLHS.SignBit && KnownRHS.SignBit &&
1193 *KnownLHS.SignBit == *KnownRHS.SignBit) {
1194 if (*KnownLHS.SignBit)
1195 Known.signBitMustBeOne();
1196 else
1197 Known.signBitMustBeZero();
1198 } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1199 Opcode == TargetOpcode::G_FMINIMUM) ||
1200 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1201 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1202 Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1203 Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1204 // FIXME: Should be using logical zero versions
1205 ((KnownLHS.isKnownNeverNegZero() ||
1206 KnownRHS.isKnownNeverPosZero()) &&
1207 (KnownLHS.isKnownNeverPosZero() ||
1208 KnownRHS.isKnownNeverNegZero()))) {
1209 if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1210 Opcode == TargetOpcode::G_FMAXNUM ||
1211 Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1212 Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1213 (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1214 Known.signBitMustBeZero();
1215 else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1216 Opcode == TargetOpcode::G_FMINNUM ||
1217 Opcode == TargetOpcode::G_FMINIMUMNUM ||
1218 Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1219 (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1220 Known.signBitMustBeOne();
1221 }
1222 }
1223 break;
1224 }
1225 case TargetOpcode::G_FCANONICALIZE: {
1226 Register Val = MI.getOperand(i: 1).getReg();
1227 KnownFPClass KnownSrc;
1228 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1229 Depth: Depth + 1);
1230
1231 // This is essentially a stronger form of
1232 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1233 // actually have an IR canonicalization guarantee.
1234
1235 // Canonicalize may flush denormals to zero, so we have to consider the
1236 // denormal mode to preserve known-not-0 knowledge.
1237 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1238
1239 // Stronger version of propagateNaN
1240 // Canonicalize is guaranteed to quiet signaling nans.
1241 if (KnownSrc.isKnownNeverNaN())
1242 Known.knownNot(RuleOut: fcNan);
1243 else
1244 Known.knownNot(RuleOut: fcSNan);
1245
1246 // If the parent function flushes denormals, the canonical output cannot
1247 // be a denormal.
1248 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1249 const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1250 DenormalMode DenormMode = MF->getDenormalMode(FPType);
1251 if (DenormMode == DenormalMode::getIEEE()) {
1252 if (KnownSrc.isKnownNever(Mask: fcPosZero))
1253 Known.knownNot(RuleOut: fcPosZero);
1254 if (KnownSrc.isKnownNever(Mask: fcNegZero))
1255 Known.knownNot(RuleOut: fcNegZero);
1256 break;
1257 }
1258
1259 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1260 Known.knownNot(RuleOut: fcSubnormal);
1261
1262 if (DenormMode.Input == DenormalMode::PositiveZero ||
1263 (DenormMode.Output == DenormalMode::PositiveZero &&
1264 DenormMode.Input == DenormalMode::IEEE))
1265 Known.knownNot(RuleOut: fcNegZero);
1266
1267 break;
1268 }
1269 case TargetOpcode::G_VECREDUCE_FMAX:
1270 case TargetOpcode::G_VECREDUCE_FMIN:
1271 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1272 case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1273 Register Val = MI.getOperand(i: 1).getReg();
1274 // reduce min/max will choose an element from one of the vector elements,
1275 // so we can infer and class information that is common to all elements.
1276
1277 Known =
1278 computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1);
1279 // Can only propagate sign if output is never NaN.
1280 if (!Known.isKnownNeverNaN())
1281 Known.SignBit.reset();
1282 break;
1283 }
1284 case TargetOpcode::G_TRUNC:
1285 case TargetOpcode::G_FFLOOR:
1286 case TargetOpcode::G_FCEIL:
1287 case TargetOpcode::G_FRINT:
1288 case TargetOpcode::G_FNEARBYINT:
1289 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1290 case TargetOpcode::G_INTRINSIC_ROUND: {
1291 Register Val = MI.getOperand(i: 1).getReg();
1292 KnownFPClass KnownSrc;
1293 FPClassTest InterestedSrcs = InterestedClasses;
1294 if (InterestedSrcs & fcPosFinite)
1295 InterestedSrcs |= fcPosFinite;
1296 if (InterestedSrcs & fcNegFinite)
1297 InterestedSrcs |= fcNegFinite;
1298 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1299
1300 // Integer results cannot be subnormal.
1301 Known.knownNot(RuleOut: fcSubnormal);
1302
1303 Known.propagateNaN(Src: KnownSrc, PreserveSign: true);
1304
1305 // TODO: handle multi unit FPTypes once LLT FPInfo lands
1306
1307 // Negative round ups to 0 produce -0
1308 if (KnownSrc.isKnownNever(Mask: fcPosFinite))
1309 Known.knownNot(RuleOut: fcPosFinite);
1310 if (KnownSrc.isKnownNever(Mask: fcNegFinite))
1311 Known.knownNot(RuleOut: fcNegFinite);
1312
1313 break;
1314 }
1315 case TargetOpcode::G_FEXP:
1316 case TargetOpcode::G_FEXP2:
1317 case TargetOpcode::G_FEXP10: {
1318 Known.knownNot(RuleOut: fcNegative);
1319 if ((InterestedClasses & fcNan) == fcNone)
1320 break;
1321
1322 Register Val = MI.getOperand(i: 1).getReg();
1323 KnownFPClass KnownSrc;
1324 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1325 Depth: Depth + 1);
1326 if (KnownSrc.isKnownNeverNaN()) {
1327 Known.knownNot(RuleOut: fcNan);
1328 Known.signBitMustBeZero();
1329 }
1330
1331 break;
1332 }
1333 case TargetOpcode::G_FLOG:
1334 case TargetOpcode::G_FLOG2:
1335 case TargetOpcode::G_FLOG10: {
1336 // log(+inf) -> +inf
1337 // log([+-]0.0) -> -inf
1338 // log(-inf) -> nan
1339 // log(-x) -> nan
1340 if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1341 break;
1342
1343 FPClassTest InterestedSrcs = InterestedClasses;
1344 if ((InterestedClasses & fcNegInf) != fcNone)
1345 InterestedSrcs |= fcZero | fcSubnormal;
1346 if ((InterestedClasses & fcNan) != fcNone)
1347 InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1348
1349 Register Val = MI.getOperand(i: 1).getReg();
1350 KnownFPClass KnownSrc;
1351 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1);
1352
1353 if (KnownSrc.isKnownNeverPosInfinity())
1354 Known.knownNot(RuleOut: fcPosInf);
1355
1356 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1357 Known.knownNot(RuleOut: fcNan);
1358
1359 LLT Ty = MRI.getType(Reg: Val).getScalarType();
1360 const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1361 DenormalMode Mode = MF->getDenormalMode(FPType: FltSem);
1362
1363 if (KnownSrc.isKnownNeverLogicalZero(Mode))
1364 Known.knownNot(RuleOut: fcNegInf);
1365
1366 break;
1367 }
1368 case TargetOpcode::G_FPOWI: {
1369 if ((InterestedClasses & fcNegative) == fcNone)
1370 break;
1371
1372 Register Exp = MI.getOperand(i: 2).getReg();
1373 LLT ExpTy = MRI.getType(Reg: Exp);
1374 KnownBits ExponentKnownBits = getKnownBits(
1375 R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1);
1376
1377 if (ExponentKnownBits.Zero[0]) { // Is even
1378 Known.knownNot(RuleOut: fcNegative);
1379 break;
1380 }
1381
1382 // Given that exp is an integer, here are the
1383 // ways that pow can return a negative value:
1384 //
1385 // pow(-x, exp) --> negative if exp is odd and x is negative.
1386 // pow(-0, exp) --> -inf if exp is negative odd.
1387 // pow(-0, exp) --> -0 if exp is positive odd.
1388 // pow(-inf, exp) --> -0 if exp is negative odd.
1389 // pow(-inf, exp) --> -inf if exp is positive odd.
1390 Register Val = MI.getOperand(i: 1).getReg();
1391 KnownFPClass KnownSrc;
1392 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: fcNegative, Known&: KnownSrc, Depth: Depth + 1);
1393 if (KnownSrc.isKnownNever(Mask: fcNegative))
1394 Known.knownNot(RuleOut: fcNegative);
1395 break;
1396 }
1397 case TargetOpcode::G_FLDEXP:
1398 case TargetOpcode::G_STRICT_FLDEXP: {
1399 Register Val = MI.getOperand(i: 1).getReg();
1400 KnownFPClass KnownSrc;
1401 computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc,
1402 Depth: Depth + 1);
1403 Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true);
1404
1405 // Sign is preserved, but underflows may produce zeroes.
1406 if (KnownSrc.isKnownNever(Mask: fcNegative))
1407 Known.knownNot(RuleOut: fcNegative);
1408 else if (KnownSrc.cannotBeOrderedLessThanZero())
1409 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1410
1411 if (KnownSrc.isKnownNever(Mask: fcPositive))
1412 Known.knownNot(RuleOut: fcPositive);
1413 else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1414 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1415
1416 // Can refine inf/zero handling based on the exponent operand.
1417 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1418 if ((InterestedClasses & ExpInfoMask) == fcNone)
1419 break;
1420 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1421 break;
1422
1423 // TODO: Handle constant range of Exp
1424
1425 break;
1426 }
1427 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1428 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1429 Depth);
1430 break;
1431 }
1432 case TargetOpcode::G_FADD:
1433 case TargetOpcode::G_STRICT_FADD:
1434 case TargetOpcode::G_FSUB:
1435 case TargetOpcode::G_STRICT_FSUB: {
1436 Register LHS = MI.getOperand(i: 1).getReg();
1437 Register RHS = MI.getOperand(i: 2).getReg();
1438 KnownFPClass KnownLHS, KnownRHS;
1439 bool WantNegative =
1440 (Opcode == TargetOpcode::G_FADD ||
1441 Opcode == TargetOpcode::G_STRICT_FADD) &&
1442 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1443 bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1444 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1445
1446 if (!WantNaN && !WantNegative && !WantNegZero)
1447 break;
1448
1449 FPClassTest InterestedSrcs = InterestedClasses;
1450 if (WantNegative)
1451 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1452 if (InterestedClasses & fcNan)
1453 InterestedSrcs |= fcInf;
1454 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1);
1455
1456 if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1457 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1458 WantNegZero ||
1459 (Opcode == TargetOpcode::G_FSUB ||
1460 Opcode == TargetOpcode::G_STRICT_FSUB)) {
1461
1462 // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1463 // there's no point.
1464 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS,
1465 Depth: Depth + 1);
1466 // Adding positive and negative infinity produces NaN.
1467 // TODO: Check sign of infinities.
1468 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1469 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1470 Known.knownNot(RuleOut: fcNan);
1471
1472 if (Opcode == TargetOpcode::G_FADD ||
1473 Opcode == TargetOpcode::G_STRICT_FADD) {
1474 if (KnownLHS.cannotBeOrderedLessThanZero() &&
1475 KnownRHS.cannotBeOrderedLessThanZero())
1476 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1477
1478 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1479 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1480 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1481 KnownRHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1482 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1483 // Make sure output negative denormal can't flush to -0
1484 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1485 Known.knownNot(RuleOut: fcNegZero);
1486 } else {
1487 // Only fsub -0, +0 can return -0
1488 if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode(
1489 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) ||
1490 KnownRHS.isKnownNeverLogicalPosZero(Mode: MF->getDenormalMode(
1491 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1492 // Make sure output negative denormal can't flush to -0
1493 outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy))
1494 Known.knownNot(RuleOut: fcNegZero);
1495 }
1496 }
1497
1498 break;
1499 }
1500 case TargetOpcode::G_FMUL:
1501 case TargetOpcode::G_STRICT_FMUL: {
1502 Register LHS = MI.getOperand(i: 1).getReg();
1503 Register RHS = MI.getOperand(i: 2).getReg();
1504 // X * X is always non-negative or a NaN.
1505 if (LHS == RHS)
1506 Known.knownNot(RuleOut: fcNegative);
1507
1508 if ((InterestedClasses & fcNan) != fcNan)
1509 break;
1510
1511 // fcSubnormal is only needed in case of DAZ.
1512 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1513
1514 KnownFPClass KnownLHS, KnownRHS;
1515 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS, Depth: Depth + 1);
1516 if (!KnownRHS.isKnownNeverNaN())
1517 break;
1518
1519 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS, Depth: Depth + 1);
1520 if (!KnownLHS.isKnownNeverNaN())
1521 break;
1522
1523 if (KnownLHS.SignBit && KnownRHS.SignBit) {
1524 if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1525 Known.signBitMustBeZero();
1526 else
1527 Known.signBitMustBeOne();
1528 }
1529
1530 // If 0 * +/-inf produces NaN.
1531 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1532 Known.knownNot(RuleOut: fcNan);
1533 break;
1534 }
1535
1536 if ((KnownRHS.isKnownNeverInfinity() ||
1537 KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1538 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) &&
1539 (KnownLHS.isKnownNeverInfinity() ||
1540 KnownRHS.isKnownNeverLogicalZero(
1541 Mode: MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))
1542 Known.knownNot(RuleOut: fcNan);
1543
1544 break;
1545 }
1546 case TargetOpcode::G_FDIV:
1547 case TargetOpcode::G_FREM: {
1548 Register LHS = MI.getOperand(i: 1).getReg();
1549 Register RHS = MI.getOperand(i: 2).getReg();
1550
1551 if (LHS == RHS) {
1552 // TODO: Could filter out snan if we inspect the operand
1553 if (Opcode == TargetOpcode::G_FDIV) {
1554 // X / X is always exactly 1.0 or a NaN.
1555 Known.KnownFPClasses = fcNan | fcPosNormal;
1556 } else {
1557 // X % X is always exactly [+-]0.0 or a NaN.
1558 Known.KnownFPClasses = fcNan | fcZero;
1559 }
1560
1561 break;
1562 }
1563
1564 const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1565 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1566 const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1567 (InterestedClasses & fcPositive) != fcNone;
1568 if (!WantNan && !WantNegative && !WantPositive)
1569 break;
1570
1571 KnownFPClass KnownLHS, KnownRHS;
1572
1573 computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative,
1574 Known&: KnownRHS, Depth: Depth + 1);
1575
1576 bool KnowSomethingUseful =
1577 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative);
1578
1579 if (KnowSomethingUseful || WantPositive) {
1580 const FPClassTest InterestedLHS =
1581 WantPositive ? fcAllFlags
1582 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
1583
1584 computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & InterestedLHS,
1585 Known&: KnownLHS, Depth: Depth + 1);
1586 }
1587
1588 if (Opcode == TargetOpcode::G_FDIV) {
1589 // Only 0/0, Inf/Inf produce NaN.
1590 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1591 (KnownLHS.isKnownNeverInfinity() ||
1592 KnownRHS.isKnownNeverInfinity()) &&
1593 ((KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1594 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) ||
1595 (KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1596 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))) {
1597 Known.knownNot(RuleOut: fcNan);
1598 }
1599
1600 // X / -0.0 is -Inf (or NaN).
1601 // +X / +X is +X
1602 if (KnownLHS.isKnownNever(Mask: fcNegative) &&
1603 KnownRHS.isKnownNever(Mask: fcNegative))
1604 Known.knownNot(RuleOut: fcNegative);
1605 } else {
1606 // Inf REM x and x REM 0 produce NaN.
1607 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1608 KnownLHS.isKnownNeverInfinity() &&
1609 KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode(
1610 FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) {
1611 Known.knownNot(RuleOut: fcNan);
1612 }
1613
1614 // The sign for frem is the same as the first operand.
1615 if (KnownLHS.cannotBeOrderedLessThanZero())
1616 Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask);
1617 if (KnownLHS.cannotBeOrderedGreaterThanZero())
1618 Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask);
1619
1620 // See if we can be more aggressive about the sign of 0.
1621 if (KnownLHS.isKnownNever(Mask: fcNegative))
1622 Known.knownNot(RuleOut: fcNegative);
1623 if (KnownLHS.isKnownNever(Mask: fcPositive))
1624 Known.knownNot(RuleOut: fcPositive);
1625 }
1626
1627 break;
1628 }
1629 case TargetOpcode::G_FPEXT: {
1630 Register Dst = MI.getOperand(i: 0).getReg();
1631 Register Src = MI.getOperand(i: 1).getReg();
1632 // Infinity, nan and zero propagate from source.
1633 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1634
1635 LLT DstTy = MRI.getType(Reg: Dst).getScalarType();
1636 const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstTy);
1637 LLT SrcTy = MRI.getType(Reg: Src).getScalarType();
1638 const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy);
1639
1640 // All subnormal inputs should be in the normal range in the result type.
1641 if (APFloat::isRepresentableAsNormalIn(Src: SrcSem, Dst: DstSem)) {
1642 if (Known.KnownFPClasses & fcPosSubnormal)
1643 Known.KnownFPClasses |= fcPosNormal;
1644 if (Known.KnownFPClasses & fcNegSubnormal)
1645 Known.KnownFPClasses |= fcNegNormal;
1646 Known.knownNot(RuleOut: fcSubnormal);
1647 }
1648
1649 // Sign bit of a nan isn't guaranteed.
1650 if (!Known.isKnownNeverNaN())
1651 Known.SignBit = std::nullopt;
1652 break;
1653 }
1654 case TargetOpcode::G_FPTRUNC: {
1655 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1656 Depth);
1657 break;
1658 }
1659 case TargetOpcode::G_SITOFP:
1660 case TargetOpcode::G_UITOFP: {
1661 // Cannot produce nan
1662 Known.knownNot(RuleOut: fcNan);
1663
1664 // Integers cannot be subnormal
1665 Known.knownNot(RuleOut: fcSubnormal);
1666
1667 // sitofp and uitofp turn into +0.0 for zero.
1668 Known.knownNot(RuleOut: fcNegZero);
1669 if (Opcode == TargetOpcode::G_UITOFP)
1670 Known.signBitMustBeZero();
1671
1672 Register Val = MI.getOperand(i: 1).getReg();
1673 LLT Ty = MRI.getType(Reg: Val);
1674
1675 if (InterestedClasses & fcInf) {
1676 // Get width of largest magnitude integer (remove a bit if signed).
1677 // This still works for a signed minimum value because the largest FP
1678 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1679 int IntSize = Ty.getScalarSizeInBits();
1680 if (Opcode == TargetOpcode::G_SITOFP)
1681 --IntSize;
1682
1683 // If the exponent of the largest finite FP value can hold the largest
1684 // integer, the result of the cast must be finite.
1685 LLT FPTy = DstTy.getScalarType();
1686 const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy);
1687 if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize)
1688 Known.knownNot(RuleOut: fcInf);
1689 }
1690
1691 break;
1692 }
1693 // case TargetOpcode::G_MERGE_VALUES:
1694 case TargetOpcode::G_BUILD_VECTOR:
1695 case TargetOpcode::G_CONCAT_VECTORS: {
1696 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI);
1697
1698 if (!DstTy.isFixedVector())
1699 break;
1700
1701 bool First = true;
1702 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1703 // We know the index we are inserting to, so clear it from Vec check.
1704 bool NeedsElt = DemandedElts[Idx];
1705
1706 // Do we demand the inserted element?
1707 if (NeedsElt) {
1708 Register Src = Merge.getSourceReg(I: Idx);
1709 if (First) {
1710 computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1);
1711 First = false;
1712 } else {
1713 KnownFPClass Known2;
1714 computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1);
1715 Known |= Known2;
1716 }
1717
1718 // If we don't know any bits, early out.
1719 if (Known.isUnknown())
1720 break;
1721 }
1722 }
1723
1724 break;
1725 }
1726 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1727 // Look through extract element. If the index is non-constant or
1728 // out-of-range demand all elements, otherwise just the extracted
1729 // element.
1730 GExtractVectorElement &Extract = cast<GExtractVectorElement>(Val&: MI);
1731 Register Vec = Extract.getVectorReg();
1732 Register Idx = Extract.getIndexReg();
1733
1734 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1735
1736 LLT VecTy = MRI.getType(Reg: Vec);
1737
1738 if (VecTy.isFixedVector()) {
1739 unsigned NumElts = VecTy.getNumElements();
1740 APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts);
1741 if (CIdx && CIdx->ult(RHS: NumElts))
1742 DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue());
1743 return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known,
1744 Depth: Depth + 1);
1745 }
1746
1747 break;
1748 }
1749 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1750 GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI);
1751 Register Vec = Insert.getVectorReg();
1752 Register Elt = Insert.getElementReg();
1753 Register Idx = Insert.getIndexReg();
1754
1755 LLT VecTy = MRI.getType(Reg: Vec);
1756
1757 if (VecTy.isScalableVector())
1758 return;
1759
1760 auto CIdx = getIConstantVRegVal(VReg: Idx, MRI);
1761
1762 unsigned NumElts = DemandedElts.getBitWidth();
1763 APInt DemandedVecElts = DemandedElts;
1764 bool NeedsElt = true;
1765 // If we know the index we are inserting to, clear it from Vec check.
1766 if (CIdx && CIdx->ult(RHS: NumElts)) {
1767 DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue());
1768 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1769 }
1770
1771 // Do we demand the inserted element?
1772 if (NeedsElt) {
1773 computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1);
1774 // If we don't know any bits, early out.
1775 if (Known.isUnknown())
1776 break;
1777 } else {
1778 Known.KnownFPClasses = fcNone;
1779 }
1780
1781 // Do we need anymore elements from Vec?
1782 if (!DemandedVecElts.isZero()) {
1783 KnownFPClass Known2;
1784 computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2,
1785 Depth: Depth + 1);
1786 Known |= Known2;
1787 }
1788
1789 break;
1790 }
1791 case TargetOpcode::G_SHUFFLE_VECTOR: {
1792 // For undef elements, we don't know anything about the common state of
1793 // the shuffle result.
1794 GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI);
1795 APInt DemandedLHS, DemandedRHS;
1796 if (DstTy.isScalableVector()) {
1797 assert(DemandedElts == APInt(1, 1));
1798 DemandedLHS = DemandedRHS = DemandedElts;
1799 } else {
1800 if (!llvm::getShuffleDemandedElts(SrcWidth: DstTy.getNumElements(), Mask: Shuf.getMask(),
1801 DemandedElts, DemandedLHS,
1802 DemandedRHS)) {
1803 Known.resetAll();
1804 return;
1805 }
1806 }
1807
1808 if (!!DemandedLHS) {
1809 Register LHS = Shuf.getSrc1Reg();
1810 computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known,
1811 Depth: Depth + 1);
1812
1813 // If we don't know any bits, early out.
1814 if (Known.isUnknown())
1815 break;
1816 } else {
1817 Known.KnownFPClasses = fcNone;
1818 }
1819
1820 if (!!DemandedRHS) {
1821 KnownFPClass Known2;
1822 Register RHS = Shuf.getSrc2Reg();
1823 computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2,
1824 Depth: Depth + 1);
1825 Known |= Known2;
1826 }
1827 break;
1828 }
1829 case TargetOpcode::COPY: {
1830 Register Src = MI.getOperand(i: 1).getReg();
1831
1832 if (!Src.isVirtual())
1833 return;
1834
1835 computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1);
1836 break;
1837 }
1838 }
1839}
1840
1841KnownFPClass
1842GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1843 FPClassTest InterestedClasses,
1844 unsigned Depth) {
1845 KnownFPClass KnownClasses;
1846 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth);
1847 return KnownClasses;
1848}
1849
1850KnownFPClass GISelValueTracking::computeKnownFPClass(
1851 Register R, FPClassTest InterestedClasses, unsigned Depth) {
1852 KnownFPClass Known;
1853 computeKnownFPClass(R, Known, InterestedClasses, Depth);
1854 return Known;
1855}
1856
1857KnownFPClass GISelValueTracking::computeKnownFPClass(
1858 Register R, const APInt &DemandedElts, uint32_t Flags,
1859 FPClassTest InterestedClasses, unsigned Depth) {
1860 if (Flags & MachineInstr::MIFlag::FmNoNans)
1861 InterestedClasses &= ~fcNan;
1862 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1863 InterestedClasses &= ~fcInf;
1864
1865 KnownFPClass Result =
1866 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1867
1868 if (Flags & MachineInstr::MIFlag::FmNoNans)
1869 Result.KnownFPClasses &= ~fcNan;
1870 if (Flags & MachineInstr::MIFlag::FmNoInfs)
1871 Result.KnownFPClasses &= ~fcInf;
1872 return Result;
1873}
1874
1875KnownFPClass GISelValueTracking::computeKnownFPClass(
1876 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1877 LLT Ty = MRI.getType(Reg: R);
1878 APInt DemandedElts =
1879 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
1880 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1881}
1882
1883/// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1884unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1885 const APInt &DemandedElts,
1886 unsigned Depth) {
1887 // Test src1 first, since we canonicalize simpler expressions to the RHS.
1888 unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth);
1889 if (Src1SignBits == 1)
1890 return 1;
1891 return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits);
1892}
1893
1894/// Compute the known number of sign bits with attached range metadata in the
1895/// memory operand. If this is an extending load, accounts for the behavior of
1896/// the high bits.
1897static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
1898 unsigned TyBits) {
1899 const MDNode *Ranges = Ld->getRanges();
1900 if (!Ranges)
1901 return 1;
1902
1903 ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges);
1904 if (TyBits > CR.getBitWidth()) {
1905 switch (Ld->getOpcode()) {
1906 case TargetOpcode::G_SEXTLOAD:
1907 CR = CR.signExtend(BitWidth: TyBits);
1908 break;
1909 case TargetOpcode::G_ZEXTLOAD:
1910 CR = CR.zeroExtend(BitWidth: TyBits);
1911 break;
1912 default:
1913 break;
1914 }
1915 }
1916
1917 return std::min(a: CR.getSignedMin().getNumSignBits(),
1918 b: CR.getSignedMax().getNumSignBits());
1919}
1920
1921unsigned GISelValueTracking::computeNumSignBits(Register R,
1922 const APInt &DemandedElts,
1923 unsigned Depth) {
1924 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
1925 unsigned Opcode = MI.getOpcode();
1926
1927 if (Opcode == TargetOpcode::G_CONSTANT)
1928 return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits();
1929
1930 if (Depth == getMaxDepth())
1931 return 1;
1932
1933 if (!DemandedElts)
1934 return 1; // No demanded elts, better to assume we don't know anything.
1935
1936 LLT DstTy = MRI.getType(Reg: R);
1937 const unsigned TyBits = DstTy.getScalarSizeInBits();
1938
1939 // Handle the case where this is called on a register that does not have a
1940 // type constraint. This is unlikely to occur except by looking through copies
1941 // but it is possible for the initial register being queried to be in this
1942 // state.
1943 if (!DstTy.isValid())
1944 return 1;
1945
1946 unsigned FirstAnswer = 1;
1947 switch (Opcode) {
1948 case TargetOpcode::COPY: {
1949 MachineOperand &Src = MI.getOperand(i: 1);
1950 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1951 MRI.getType(Reg: Src.getReg()).isValid()) {
1952 // Don't increment Depth for this one since we didn't do any work.
1953 return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth);
1954 }
1955
1956 return 1;
1957 }
1958 case TargetOpcode::G_SEXT: {
1959 Register Src = MI.getOperand(i: 1).getReg();
1960 LLT SrcTy = MRI.getType(Reg: Src);
1961 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1962 return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp;
1963 }
1964 case TargetOpcode::G_ASSERT_SEXT:
1965 case TargetOpcode::G_SEXT_INREG: {
1966 // Max of the input and what this extends.
1967 Register Src = MI.getOperand(i: 1).getReg();
1968 unsigned SrcBits = MI.getOperand(i: 2).getImm();
1969 unsigned InRegBits = TyBits - SrcBits + 1;
1970 return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1),
1971 b: InRegBits);
1972 }
1973 case TargetOpcode::G_LOAD: {
1974 GLoad *Ld = cast<GLoad>(Val: &MI);
1975 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1976 break;
1977
1978 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1979 }
1980 case TargetOpcode::G_SEXTLOAD: {
1981 GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI);
1982
1983 // FIXME: We need an in-memory type representation.
1984 if (DstTy.isVector())
1985 return 1;
1986
1987 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1988 if (NumBits != 1)
1989 return NumBits;
1990
1991 // e.g. i16->i32 = '17' bits known.
1992 const MachineMemOperand *MMO = *MI.memoperands_begin();
1993 return TyBits - MMO->getSizeInBits().getValue() + 1;
1994 }
1995 case TargetOpcode::G_ZEXTLOAD: {
1996 GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI);
1997
1998 // FIXME: We need an in-memory type representation.
1999 if (DstTy.isVector())
2000 return 1;
2001
2002 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
2003 if (NumBits != 1)
2004 return NumBits;
2005
2006 // e.g. i16->i32 = '16' bits known.
2007 const MachineMemOperand *MMO = *MI.memoperands_begin();
2008 return TyBits - MMO->getSizeInBits().getValue();
2009 }
2010 case TargetOpcode::G_AND:
2011 case TargetOpcode::G_OR:
2012 case TargetOpcode::G_XOR: {
2013 Register Src1 = MI.getOperand(i: 1).getReg();
2014 unsigned Src1NumSignBits =
2015 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2016 if (Src1NumSignBits != 1) {
2017 Register Src2 = MI.getOperand(i: 2).getReg();
2018 unsigned Src2NumSignBits =
2019 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2020 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits);
2021 }
2022 break;
2023 }
2024 case TargetOpcode::G_ASHR: {
2025 Register Src1 = MI.getOperand(i: 1).getReg();
2026 Register Src2 = MI.getOperand(i: 2).getReg();
2027 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2028 if (auto C = getValidMinimumShiftAmount(R: Src2, DemandedElts, Depth: Depth + 1))
2029 FirstAnswer = std::min<uint64_t>(a: FirstAnswer + *C, b: TyBits);
2030 break;
2031 }
2032 case TargetOpcode::G_SHL: {
2033 Register Src1 = MI.getOperand(i: 1).getReg();
2034 Register Src2 = MI.getOperand(i: 2).getReg();
2035 if (std::optional<ConstantRange> ShAmtRange =
2036 getValidShiftAmountRange(R: Src2, DemandedElts, Depth: Depth + 1)) {
2037 uint64_t MaxShAmt = ShAmtRange->getUnsignedMax().getZExtValue();
2038 uint64_t MinShAmt = ShAmtRange->getUnsignedMin().getZExtValue();
2039
2040 MachineInstr &ExtMI = *MRI.getVRegDef(Reg: Src1);
2041 unsigned ExtOpc = ExtMI.getOpcode();
2042
2043 // Try to look through ZERO/SIGN/ANY_EXTEND. If all extended bits are
2044 // shifted out, then we can compute the number of sign bits for the
2045 // operand being extended. A future improvement could be to pass along the
2046 // "shifted left by" information in the recursive calls to
2047 // ComputeKnownSignBits. Allowing us to handle this more generically.
2048 if (ExtOpc == TargetOpcode::G_SEXT || ExtOpc == TargetOpcode::G_ZEXT ||
2049 ExtOpc == TargetOpcode::G_ANYEXT) {
2050 LLT ExtTy = MRI.getType(Reg: Src1);
2051 Register Extendee = ExtMI.getOperand(i: 1).getReg();
2052 LLT ExtendeeTy = MRI.getType(Reg: Extendee);
2053 uint64_t SizeDiff =
2054 ExtTy.getScalarSizeInBits() - ExtendeeTy.getScalarSizeInBits();
2055
2056 if (SizeDiff <= MinShAmt) {
2057 unsigned Tmp =
2058 SizeDiff + computeNumSignBits(R: Extendee, DemandedElts, Depth: Depth + 1);
2059 if (MaxShAmt < Tmp)
2060 return Tmp - MaxShAmt;
2061 }
2062 }
2063 // shl destroys sign bits, ensure it doesn't shift out all sign bits.
2064 unsigned Tmp = computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2065 if (MaxShAmt < Tmp)
2066 return Tmp - MaxShAmt;
2067 }
2068 break;
2069 }
2070 case TargetOpcode::G_TRUNC: {
2071 Register Src = MI.getOperand(i: 1).getReg();
2072 LLT SrcTy = MRI.getType(Reg: Src);
2073
2074 // Check if the sign bits of source go down as far as the truncated value.
2075 unsigned DstTyBits = DstTy.getScalarSizeInBits();
2076 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
2077 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1);
2078 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
2079 return NumSrcSignBits - (NumSrcBits - DstTyBits);
2080 break;
2081 }
2082 case TargetOpcode::G_SELECT: {
2083 return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(),
2084 Src1: MI.getOperand(i: 3).getReg(), DemandedElts,
2085 Depth: Depth + 1);
2086 }
2087 case TargetOpcode::G_SMIN:
2088 case TargetOpcode::G_SMAX:
2089 case TargetOpcode::G_UMIN:
2090 case TargetOpcode::G_UMAX:
2091 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
2092 return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(),
2093 Src1: MI.getOperand(i: 2).getReg(), DemandedElts,
2094 Depth: Depth + 1);
2095 case TargetOpcode::G_SADDO:
2096 case TargetOpcode::G_SADDE:
2097 case TargetOpcode::G_UADDO:
2098 case TargetOpcode::G_UADDE:
2099 case TargetOpcode::G_SSUBO:
2100 case TargetOpcode::G_SSUBE:
2101 case TargetOpcode::G_USUBO:
2102 case TargetOpcode::G_USUBE:
2103 case TargetOpcode::G_SMULO:
2104 case TargetOpcode::G_UMULO: {
2105 // If compares returns 0/-1, all bits are sign bits.
2106 // We know that we have an integer-based boolean since these operations
2107 // are only available for integer.
2108 if (MI.getOperand(i: 1).getReg() == R) {
2109 if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) ==
2110 TargetLowering::ZeroOrNegativeOneBooleanContent)
2111 return TyBits;
2112 }
2113
2114 break;
2115 }
2116 case TargetOpcode::G_SUB: {
2117 Register Src2 = MI.getOperand(i: 2).getReg();
2118 unsigned Src2NumSignBits =
2119 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2120 if (Src2NumSignBits == 1)
2121 return 1; // Early out.
2122
2123 // Handle NEG.
2124 Register Src1 = MI.getOperand(i: 1).getReg();
2125 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2126 if (Known1.isZero()) {
2127 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2128 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2129 // sign bits set.
2130 if ((Known2.Zero | 1).isAllOnes())
2131 return TyBits;
2132
2133 // If the input is known to be positive (the sign bit is known clear),
2134 // the output of the NEG has, at worst, the same number of sign bits as
2135 // the input.
2136 if (Known2.isNonNegative()) {
2137 FirstAnswer = Src2NumSignBits;
2138 break;
2139 }
2140
2141 // Otherwise, we treat this like a SUB.
2142 }
2143
2144 unsigned Src1NumSignBits =
2145 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2146 if (Src1NumSignBits == 1)
2147 return 1; // Early Out.
2148
2149 // Sub can have at most one carry bit. Thus we know that the output
2150 // is, at worst, one more bit than the inputs.
2151 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2152 break;
2153 }
2154 case TargetOpcode::G_ADD: {
2155 Register Src2 = MI.getOperand(i: 2).getReg();
2156 unsigned Src2NumSignBits =
2157 computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1);
2158 if (Src2NumSignBits <= 2)
2159 return 1; // Early out.
2160
2161 Register Src1 = MI.getOperand(i: 1).getReg();
2162 unsigned Src1NumSignBits =
2163 computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1);
2164 if (Src1NumSignBits == 1)
2165 return 1; // Early Out.
2166
2167 // Special case decrementing a value (ADD X, -1):
2168 KnownBits Known2 = getKnownBits(R: Src2, DemandedElts, Depth);
2169 if (Known2.isAllOnes()) {
2170 KnownBits Known1 = getKnownBits(R: Src1, DemandedElts, Depth);
2171 // If the input is known to be 0 or 1, the output is 0/-1, which is all
2172 // sign bits set.
2173 if ((Known1.Zero | 1).isAllOnes())
2174 return TyBits;
2175
2176 // If we are subtracting one from a positive number, there is no carry
2177 // out of the result.
2178 if (Known1.isNonNegative()) {
2179 FirstAnswer = Src1NumSignBits;
2180 break;
2181 }
2182
2183 // Otherwise, we treat this like an ADD.
2184 }
2185
2186 // Add can have at most one carry bit. Thus we know that the output
2187 // is, at worst, one more bit than the inputs.
2188 FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits) - 1;
2189 break;
2190 }
2191 case TargetOpcode::G_FCMP:
2192 case TargetOpcode::G_ICMP: {
2193 bool IsFP = Opcode == TargetOpcode::G_FCMP;
2194 if (TyBits == 1)
2195 break;
2196 auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP);
2197 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
2198 return TyBits; // All bits are sign bits.
2199 if (BC == TargetLowering::ZeroOrOneBooleanContent)
2200 return TyBits - 1; // Every always-zero bit is a sign bit.
2201 break;
2202 }
2203 case TargetOpcode::G_BUILD_VECTOR: {
2204 // Collect the known bits that are shared by every demanded vector element.
2205 FirstAnswer = TyBits;
2206 APInt SingleDemandedElt(1, 1);
2207 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2208 if (!DemandedElts[I])
2209 continue;
2210
2211 unsigned Tmp2 =
2212 computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1);
2213 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2214
2215 // If we don't know any bits, early out.
2216 if (FirstAnswer == 1)
2217 break;
2218 }
2219 break;
2220 }
2221 case TargetOpcode::G_CONCAT_VECTORS: {
2222 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector())
2223 break;
2224 FirstAnswer = TyBits;
2225 // Determine the minimum number of sign bits across all demanded
2226 // elts of the input vectors. Early out if the result is already 1.
2227 unsigned NumSubVectorElts =
2228 MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements();
2229 for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) {
2230 APInt DemandedSub =
2231 DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts);
2232 if (!DemandedSub)
2233 continue;
2234 unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1);
2235
2236 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2237
2238 // If we don't know any bits, early out.
2239 if (FirstAnswer == 1)
2240 break;
2241 }
2242 break;
2243 }
2244 case TargetOpcode::G_SHUFFLE_VECTOR: {
2245 // Collect the minimum number of sign bits that are shared by every vector
2246 // element referenced by the shuffle.
2247 APInt DemandedLHS, DemandedRHS;
2248 Register Src1 = MI.getOperand(i: 1).getReg();
2249 unsigned NumElts = MRI.getType(Reg: Src1).getNumElements();
2250 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(),
2251 DemandedElts, DemandedLHS, DemandedRHS))
2252 return 1;
2253
2254 if (!!DemandedLHS)
2255 FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1);
2256 // If we don't know anything, early out and try computeKnownBits fall-back.
2257 if (FirstAnswer == 1)
2258 break;
2259 if (!!DemandedRHS) {
2260 unsigned Tmp2 =
2261 computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1);
2262 FirstAnswer = std::min(a: FirstAnswer, b: Tmp2);
2263 }
2264 break;
2265 }
2266 case TargetOpcode::G_SPLAT_VECTOR: {
2267 // Check if the sign bits of source go down as far as the truncated value.
2268 Register Src = MI.getOperand(i: 1).getReg();
2269 unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1);
2270 unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits();
2271 if (NumSrcSignBits > (NumSrcBits - TyBits))
2272 return NumSrcSignBits - (NumSrcBits - TyBits);
2273 break;
2274 }
2275 case TargetOpcode::G_INTRINSIC:
2276 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2277 case TargetOpcode::G_INTRINSIC_CONVERGENT:
2278 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2279 default: {
2280 unsigned NumBits =
2281 TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth);
2282 if (NumBits > 1)
2283 FirstAnswer = std::max(a: FirstAnswer, b: NumBits);
2284 break;
2285 }
2286 }
2287
2288 // Finally, if we can prove that the top bits of the result are 0's or 1's,
2289 // use this information.
2290 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2291 APInt Mask;
2292 if (Known.isNonNegative()) { // sign bit is 0
2293 Mask = Known.Zero;
2294 } else if (Known.isNegative()) { // sign bit is 1;
2295 Mask = Known.One;
2296 } else {
2297 // Nothing known.
2298 return FirstAnswer;
2299 }
2300
2301 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
2302 // the number of identical bits in the top of the input value.
2303 Mask <<= Mask.getBitWidth() - TyBits;
2304 return std::max(a: FirstAnswer, b: Mask.countl_one());
2305}
2306
2307unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2308 LLT Ty = MRI.getType(Reg: R);
2309 APInt DemandedElts =
2310 Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1);
2311 return computeNumSignBits(R, DemandedElts, Depth);
2312}
2313
2314std::optional<ConstantRange> GISelValueTracking::getValidShiftAmountRange(
2315 Register R, const APInt &DemandedElts, unsigned Depth) {
2316 // Shifting more than the bitwidth is not valid.
2317 MachineInstr &MI = *MRI.getVRegDef(Reg: R);
2318 unsigned Opcode = MI.getOpcode();
2319
2320 LLT Ty = MRI.getType(Reg: R);
2321 unsigned BitWidth = Ty.getScalarSizeInBits();
2322
2323 if (Opcode == TargetOpcode::G_CONSTANT) {
2324 const APInt &ShAmt = MI.getOperand(i: 1).getCImm()->getValue();
2325 if (ShAmt.uge(RHS: BitWidth))
2326 return std::nullopt;
2327 return ConstantRange(ShAmt);
2328 }
2329
2330 if (Opcode == TargetOpcode::G_BUILD_VECTOR) {
2331 const APInt *MinAmt = nullptr, *MaxAmt = nullptr;
2332 for (unsigned I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
2333 if (!DemandedElts[I])
2334 continue;
2335 MachineInstr *Op = MRI.getVRegDef(Reg: MI.getOperand(i: I + 1).getReg());
2336 if (Op->getOpcode() != TargetOpcode::G_CONSTANT) {
2337 MinAmt = MaxAmt = nullptr;
2338 break;
2339 }
2340
2341 const APInt &ShAmt = Op->getOperand(i: 1).getCImm()->getValue();
2342 if (ShAmt.uge(RHS: BitWidth))
2343 return std::nullopt;
2344 if (!MinAmt || MinAmt->ugt(RHS: ShAmt))
2345 MinAmt = &ShAmt;
2346 if (!MaxAmt || MaxAmt->ult(RHS: ShAmt))
2347 MaxAmt = &ShAmt;
2348 }
2349 assert(((!MinAmt && !MaxAmt) || (MinAmt && MaxAmt)) &&
2350 "Failed to find matching min/max shift amounts");
2351 if (MinAmt && MaxAmt)
2352 return ConstantRange(*MinAmt, *MaxAmt + 1);
2353 }
2354
2355 // Use computeKnownBits to find a hidden constant/knownbits (usually type
2356 // legalized). e.g. Hidden behind multiple bitcasts/build_vector/casts etc.
2357 KnownBits KnownAmt = getKnownBits(R, DemandedElts, Depth);
2358 if (KnownAmt.getMaxValue().ult(RHS: BitWidth))
2359 return ConstantRange::fromKnownBits(Known: KnownAmt, /*IsSigned=*/false);
2360
2361 return std::nullopt;
2362}
2363
2364std::optional<uint64_t> GISelValueTracking::getValidMinimumShiftAmount(
2365 Register R, const APInt &DemandedElts, unsigned Depth) {
2366 if (std::optional<ConstantRange> AmtRange =
2367 getValidShiftAmountRange(R, DemandedElts, Depth))
2368 return AmtRange->getUnsignedMin().getZExtValue();
2369 return std::nullopt;
2370}
2371
2372void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2373 AnalysisUsage &AU) const {
2374 AU.setPreservesAll();
2375 MachineFunctionPass::getAnalysisUsage(AU);
2376}
2377
2378bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2379 MachineFunction &MF) {
2380 return false;
2381}
2382
2383GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2384 if (!Info) {
2385 unsigned MaxDepth =
2386 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2387 Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth);
2388 }
2389 return *Info;
2390}
2391
2392AnalysisKey GISelValueTrackingAnalysis::Key;
2393
2394GISelValueTracking
2395GISelValueTrackingAnalysis::run(MachineFunction &MF,
2396 MachineFunctionAnalysisManager &MFAM) {
2397 return Result(MF);
2398}
2399
2400PreservedAnalyses
2401GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2402 MachineFunctionAnalysisManager &MFAM) {
2403 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF);
2404 const auto &MRI = MF.getRegInfo();
2405 OS << "name: ";
2406 MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false);
2407 OS << '\n';
2408
2409 for (MachineBasicBlock &BB : MF) {
2410 for (MachineInstr &MI : BB) {
2411 for (MachineOperand &MO : MI.defs()) {
2412 if (!MO.isReg() || MO.getReg().isPhysical())
2413 continue;
2414 Register Reg = MO.getReg();
2415 if (!MRI.getType(Reg).isValid())
2416 continue;
2417 KnownBits Known = VTA.getKnownBits(R: Reg);
2418 unsigned SignedBits = VTA.computeNumSignBits(R: Reg);
2419 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2420 << '\n';
2421 };
2422 }
2423 }
2424 return PreservedAnalyses::all();
2425}
2426