1 | //===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++ |
2 | //*-===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | /// Provides analysis for querying information about KnownBits during GISel |
11 | /// passes. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" |
15 | #include "llvm/ADT/APFloat.h" |
16 | #include "llvm/ADT/FloatingPointMode.h" |
17 | #include "llvm/ADT/ScopeExit.h" |
18 | #include "llvm/ADT/StringExtras.h" |
19 | #include "llvm/Analysis/ValueTracking.h" |
20 | #include "llvm/Analysis/VectorUtils.h" |
21 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
23 | #include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h" |
24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
25 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
26 | #include "llvm/CodeGen/MachineFrameInfo.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineOperand.h" |
29 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
30 | #include "llvm/CodeGen/Register.h" |
31 | #include "llvm/CodeGen/TargetLowering.h" |
32 | #include "llvm/CodeGen/TargetOpcodes.h" |
33 | #include "llvm/IR/ConstantRange.h" |
34 | #include "llvm/IR/DerivedTypes.h" |
35 | #include "llvm/IR/FMF.h" |
36 | #include "llvm/MC/TargetRegistry.h" |
37 | #include "llvm/Support/KnownBits.h" |
38 | #include "llvm/Support/KnownFPClass.h" |
39 | #include "llvm/Target/TargetMachine.h" |
40 | |
41 | #define DEBUG_TYPE "gisel-known-bits" |
42 | |
43 | using namespace llvm; |
44 | using namespace MIPatternMatch; |
45 | |
46 | char llvm::GISelValueTrackingAnalysisLegacy::ID = 0; |
47 | |
48 | INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE, |
49 | "Analysis for ComputingKnownBits" , false, true) |
50 | |
51 | GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth) |
52 | : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), |
53 | DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} |
54 | |
55 | Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) { |
56 | const MachineInstr *MI = MRI.getVRegDef(Reg: R); |
57 | switch (MI->getOpcode()) { |
58 | case TargetOpcode::COPY: |
59 | return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth); |
60 | case TargetOpcode::G_ASSERT_ALIGN: { |
61 | // TODO: Min with source |
62 | return Align(MI->getOperand(i: 2).getImm()); |
63 | } |
64 | case TargetOpcode::G_FRAME_INDEX: { |
65 | int FrameIdx = MI->getOperand(i: 1).getIndex(); |
66 | return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx); |
67 | } |
68 | case TargetOpcode::G_INTRINSIC: |
69 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
70 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
71 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
72 | default: |
73 | return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1); |
74 | } |
75 | } |
76 | |
77 | KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) { |
78 | assert(MI.getNumExplicitDefs() == 1 && |
79 | "expected single return generic instruction" ); |
80 | return getKnownBits(R: MI.getOperand(i: 0).getReg()); |
81 | } |
82 | |
83 | KnownBits GISelValueTracking::getKnownBits(Register R) { |
84 | const LLT Ty = MRI.getType(Reg: R); |
85 | // Since the number of lanes in a scalable vector is unknown at compile time, |
86 | // we track one bit which is implicitly broadcast to all lanes. This means |
87 | // that all lanes in a scalable vector are considered demanded. |
88 | APInt DemandedElts = |
89 | Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
90 | return getKnownBits(R, DemandedElts); |
91 | } |
92 | |
93 | KnownBits GISelValueTracking::getKnownBits(Register R, |
94 | const APInt &DemandedElts, |
95 | unsigned Depth) { |
96 | // For now, we only maintain the cache during one request. |
97 | assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared" ); |
98 | |
99 | KnownBits Known; |
100 | computeKnownBitsImpl(R, Known, DemandedElts, Depth); |
101 | ComputeKnownBitsCache.clear(); |
102 | return Known; |
103 | } |
104 | |
105 | bool GISelValueTracking::signBitIsZero(Register R) { |
106 | LLT Ty = MRI.getType(Reg: R); |
107 | unsigned BitWidth = Ty.getScalarSizeInBits(); |
108 | return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth)); |
109 | } |
110 | |
111 | APInt GISelValueTracking::getKnownZeroes(Register R) { |
112 | return getKnownBits(R).Zero; |
113 | } |
114 | |
115 | APInt GISelValueTracking::getKnownOnes(Register R) { |
116 | return getKnownBits(R).One; |
117 | } |
118 | |
119 | LLVM_ATTRIBUTE_UNUSED static void |
120 | dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { |
121 | dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth |
122 | << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" |
123 | << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n" |
124 | << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false) |
125 | << "\n" |
126 | << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false) |
127 | << "\n" ; |
128 | } |
129 | |
130 | /// Compute known bits for the intersection of \p Src0 and \p Src1 |
131 | void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1, |
132 | KnownBits &Known, |
133 | const APInt &DemandedElts, |
134 | unsigned Depth) { |
135 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
136 | computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth); |
137 | |
138 | // If we don't know any bits, early out. |
139 | if (Known.isUnknown()) |
140 | return; |
141 | |
142 | KnownBits Known2; |
143 | computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth); |
144 | |
145 | // Only known if known in both the LHS and RHS. |
146 | Known = Known.intersectWith(RHS: Known2); |
147 | } |
148 | |
149 | // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is |
150 | // created using Width. Use this function when the inputs are KnownBits |
151 | // objects. TODO: Move this KnownBits.h if this is usable in more cases. |
152 | static KnownBits (unsigned BitWidth, const KnownBits &SrcOpKnown, |
153 | const KnownBits &OffsetKnown, |
154 | const KnownBits &WidthKnown) { |
155 | KnownBits Mask(BitWidth); |
156 | Mask.Zero = APInt::getBitsSetFrom( |
157 | numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth)); |
158 | Mask.One = APInt::getLowBitsSet( |
159 | numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth)); |
160 | return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask; |
161 | } |
162 | |
163 | void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, |
164 | const APInt &DemandedElts, |
165 | unsigned Depth) { |
166 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
167 | unsigned Opcode = MI.getOpcode(); |
168 | LLT DstTy = MRI.getType(Reg: R); |
169 | |
170 | // Handle the case where this is called on a register that does not have a |
171 | // type constraint. For example, it may be post-ISel or this target might not |
172 | // preserve the type when early-selecting instructions. |
173 | if (!DstTy.isValid()) { |
174 | Known = KnownBits(); |
175 | return; |
176 | } |
177 | |
178 | #ifndef NDEBUG |
179 | if (DstTy.isFixedVector()) { |
180 | assert( |
181 | DstTy.getNumElements() == DemandedElts.getBitWidth() && |
182 | "DemandedElt width should equal the fixed vector number of elements" ); |
183 | } else { |
184 | assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) && |
185 | "DemandedElt width should be 1 for scalars or scalable vectors" ); |
186 | } |
187 | #endif |
188 | |
189 | unsigned BitWidth = DstTy.getScalarSizeInBits(); |
190 | auto CacheEntry = ComputeKnownBitsCache.find(Val: R); |
191 | if (CacheEntry != ComputeKnownBitsCache.end()) { |
192 | Known = CacheEntry->second; |
193 | LLVM_DEBUG(dbgs() << "Cache hit at " ); |
194 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
195 | assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match" ); |
196 | return; |
197 | } |
198 | Known = KnownBits(BitWidth); // Don't know anything |
199 | |
200 | // Depth may get bigger than max depth if it gets passed to a different |
201 | // GISelValueTracking object. |
202 | // This may happen when say a generic part uses a GISelValueTracking object |
203 | // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr |
204 | // which creates a new GISelValueTracking object with a different and smaller |
205 | // depth. If we just check for equality, we would never exit if the depth |
206 | // that is passed down to the target specific GISelValueTracking object is |
207 | // already bigger than its max depth. |
208 | if (Depth >= getMaxDepth()) |
209 | return; |
210 | |
211 | if (!DemandedElts) |
212 | return; // No demanded elts, better to assume we don't know anything. |
213 | |
214 | KnownBits Known2; |
215 | |
216 | switch (Opcode) { |
217 | default: |
218 | TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI, |
219 | Depth); |
220 | break; |
221 | case TargetOpcode::G_BUILD_VECTOR: { |
222 | // Collect the known bits that are shared by every demanded vector element. |
223 | Known.Zero.setAllBits(); |
224 | Known.One.setAllBits(); |
225 | for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) { |
226 | if (!DemandedElts[I]) |
227 | continue; |
228 | |
229 | computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: APInt(1, 1), Depth: Depth + 1); |
230 | |
231 | // Known bits are the values that are shared by every demanded element. |
232 | Known = Known.intersectWith(RHS: Known2); |
233 | |
234 | // If we don't know any bits, early out. |
235 | if (Known.isUnknown()) |
236 | break; |
237 | } |
238 | break; |
239 | } |
240 | case TargetOpcode::G_SPLAT_VECTOR: { |
241 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts: APInt(1, 1), |
242 | Depth: Depth + 1); |
243 | // Implicitly truncate the bits to match the official semantics of |
244 | // G_SPLAT_VECTOR. |
245 | Known = Known.trunc(BitWidth); |
246 | break; |
247 | } |
248 | case TargetOpcode::COPY: |
249 | case TargetOpcode::G_PHI: |
250 | case TargetOpcode::PHI: { |
251 | Known.One = APInt::getAllOnes(numBits: BitWidth); |
252 | Known.Zero = APInt::getAllOnes(numBits: BitWidth); |
253 | // Destination registers should not have subregisters at this |
254 | // point of the pipeline, otherwise the main live-range will be |
255 | // defined more than once, which is against SSA. |
256 | assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?" ); |
257 | // Record in the cache that we know nothing for MI. |
258 | // This will get updated later and in the meantime, if we reach that |
259 | // phi again, because of a loop, we will cut the search thanks to this |
260 | // cache entry. |
261 | // We could actually build up more information on the phi by not cutting |
262 | // the search, but that additional information is more a side effect |
263 | // than an intended choice. |
264 | // Therefore, for now, save on compile time until we derive a proper way |
265 | // to derive known bits for PHIs within loops. |
266 | ComputeKnownBitsCache[R] = KnownBits(BitWidth); |
267 | // PHI's operand are a mix of registers and basic blocks interleaved. |
268 | // We only care about the register ones. |
269 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
270 | const MachineOperand &Src = MI.getOperand(i: Idx); |
271 | Register SrcReg = Src.getReg(); |
272 | // Look through trivial copies and phis but don't look through trivial |
273 | // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits |
274 | // analysis is currently unable to determine the bit width of a |
275 | // register class. |
276 | // |
277 | // We can't use NoSubRegister by name as it's defined by each target but |
278 | // it's always defined to be 0 by tablegen. |
279 | if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && |
280 | MRI.getType(Reg: SrcReg).isValid()) { |
281 | // For COPYs we don't do anything, don't increase the depth. |
282 | computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts, |
283 | Depth: Depth + (Opcode != TargetOpcode::COPY)); |
284 | Known2 = Known2.anyextOrTrunc(BitWidth); |
285 | Known = Known.intersectWith(RHS: Known2); |
286 | // If we reach a point where we don't know anything |
287 | // just stop looking through the operands. |
288 | if (Known.isUnknown()) |
289 | break; |
290 | } else { |
291 | // We know nothing. |
292 | Known = KnownBits(BitWidth); |
293 | break; |
294 | } |
295 | } |
296 | break; |
297 | } |
298 | case TargetOpcode::G_CONSTANT: { |
299 | Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue()); |
300 | break; |
301 | } |
302 | case TargetOpcode::G_FRAME_INDEX: { |
303 | int FrameIdx = MI.getOperand(i: 1).getIndex(); |
304 | TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF); |
305 | break; |
306 | } |
307 | case TargetOpcode::G_SUB: { |
308 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
309 | Depth: Depth + 1); |
310 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
311 | Depth: Depth + 1); |
312 | Known = KnownBits::sub(LHS: Known, RHS: Known2); |
313 | break; |
314 | } |
315 | case TargetOpcode::G_XOR: { |
316 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
317 | Depth: Depth + 1); |
318 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
319 | Depth: Depth + 1); |
320 | |
321 | Known ^= Known2; |
322 | break; |
323 | } |
324 | case TargetOpcode::G_PTR_ADD: { |
325 | if (DstTy.isVector()) |
326 | break; |
327 | // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? |
328 | LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg()); |
329 | if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace())) |
330 | break; |
331 | [[fallthrough]]; |
332 | } |
333 | case TargetOpcode::G_ADD: { |
334 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
335 | Depth: Depth + 1); |
336 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
337 | Depth: Depth + 1); |
338 | Known = KnownBits::add(LHS: Known, RHS: Known2); |
339 | break; |
340 | } |
341 | case TargetOpcode::G_AND: { |
342 | // If either the LHS or the RHS are Zero, the result is zero. |
343 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
344 | Depth: Depth + 1); |
345 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
346 | Depth: Depth + 1); |
347 | |
348 | Known &= Known2; |
349 | break; |
350 | } |
351 | case TargetOpcode::G_OR: { |
352 | // If either the LHS or the RHS are Zero, the result is zero. |
353 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
354 | Depth: Depth + 1); |
355 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
356 | Depth: Depth + 1); |
357 | |
358 | Known |= Known2; |
359 | break; |
360 | } |
361 | case TargetOpcode::G_MUL: { |
362 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
363 | Depth: Depth + 1); |
364 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
365 | Depth: Depth + 1); |
366 | Known = KnownBits::mul(LHS: Known, RHS: Known2); |
367 | break; |
368 | } |
369 | case TargetOpcode::G_SELECT: { |
370 | computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(), |
371 | Known, DemandedElts, Depth: Depth + 1); |
372 | break; |
373 | } |
374 | case TargetOpcode::G_SMIN: { |
375 | // TODO: Handle clamp pattern with number of sign bits |
376 | KnownBits KnownRHS; |
377 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
378 | Depth: Depth + 1); |
379 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
380 | Depth: Depth + 1); |
381 | Known = KnownBits::smin(LHS: Known, RHS: KnownRHS); |
382 | break; |
383 | } |
384 | case TargetOpcode::G_SMAX: { |
385 | // TODO: Handle clamp pattern with number of sign bits |
386 | KnownBits KnownRHS; |
387 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
388 | Depth: Depth + 1); |
389 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
390 | Depth: Depth + 1); |
391 | Known = KnownBits::smax(LHS: Known, RHS: KnownRHS); |
392 | break; |
393 | } |
394 | case TargetOpcode::G_UMIN: { |
395 | KnownBits KnownRHS; |
396 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
397 | Depth: Depth + 1); |
398 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
399 | Depth: Depth + 1); |
400 | Known = KnownBits::umin(LHS: Known, RHS: KnownRHS); |
401 | break; |
402 | } |
403 | case TargetOpcode::G_UMAX: { |
404 | KnownBits KnownRHS; |
405 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
406 | Depth: Depth + 1); |
407 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
408 | Depth: Depth + 1); |
409 | Known = KnownBits::umax(LHS: Known, RHS: KnownRHS); |
410 | break; |
411 | } |
412 | case TargetOpcode::G_FCMP: |
413 | case TargetOpcode::G_ICMP: { |
414 | if (DstTy.isVector()) |
415 | break; |
416 | if (TL.getBooleanContents(isVec: DstTy.isVector(), |
417 | isFloat: Opcode == TargetOpcode::G_FCMP) == |
418 | TargetLowering::ZeroOrOneBooleanContent && |
419 | BitWidth > 1) |
420 | Known.Zero.setBitsFrom(1); |
421 | break; |
422 | } |
423 | case TargetOpcode::G_SEXT: { |
424 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
425 | Depth: Depth + 1); |
426 | // If the sign bit is known to be zero or one, then sext will extend |
427 | // it to the top bits, else it will just zext. |
428 | Known = Known.sext(BitWidth); |
429 | break; |
430 | } |
431 | case TargetOpcode::G_ASSERT_SEXT: |
432 | case TargetOpcode::G_SEXT_INREG: { |
433 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
434 | Depth: Depth + 1); |
435 | Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm()); |
436 | break; |
437 | } |
438 | case TargetOpcode::G_ANYEXT: { |
439 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
440 | Depth: Depth + 1); |
441 | Known = Known.anyext(BitWidth); |
442 | break; |
443 | } |
444 | case TargetOpcode::G_LOAD: { |
445 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
446 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
447 | if (const MDNode *Ranges = MMO->getRanges()) |
448 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
449 | Known = KnownRange.anyext(BitWidth: Known.getBitWidth()); |
450 | break; |
451 | } |
452 | case TargetOpcode::G_SEXTLOAD: |
453 | case TargetOpcode::G_ZEXTLOAD: { |
454 | if (DstTy.isVector()) |
455 | break; |
456 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
457 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
458 | if (const MDNode *Ranges = MMO->getRanges()) |
459 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
460 | Known = Opcode == TargetOpcode::G_SEXTLOAD |
461 | ? KnownRange.sext(BitWidth: Known.getBitWidth()) |
462 | : KnownRange.zext(BitWidth: Known.getBitWidth()); |
463 | break; |
464 | } |
465 | case TargetOpcode::G_ASHR: { |
466 | KnownBits LHSKnown, RHSKnown; |
467 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
468 | Depth: Depth + 1); |
469 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
470 | Depth: Depth + 1); |
471 | Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown); |
472 | break; |
473 | } |
474 | case TargetOpcode::G_LSHR: { |
475 | KnownBits LHSKnown, RHSKnown; |
476 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
477 | Depth: Depth + 1); |
478 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
479 | Depth: Depth + 1); |
480 | Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown); |
481 | break; |
482 | } |
483 | case TargetOpcode::G_SHL: { |
484 | KnownBits LHSKnown, RHSKnown; |
485 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
486 | Depth: Depth + 1); |
487 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
488 | Depth: Depth + 1); |
489 | Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown); |
490 | break; |
491 | } |
492 | case TargetOpcode::G_INTTOPTR: |
493 | case TargetOpcode::G_PTRTOINT: |
494 | if (DstTy.isVector()) |
495 | break; |
496 | // Fall through and handle them the same as zext/trunc. |
497 | [[fallthrough]]; |
498 | case TargetOpcode::G_ZEXT: |
499 | case TargetOpcode::G_TRUNC: { |
500 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
501 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
502 | Known = Known.zextOrTrunc(BitWidth); |
503 | break; |
504 | } |
505 | case TargetOpcode::G_ASSERT_ZEXT: { |
506 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
507 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
508 | |
509 | unsigned SrcBitWidth = MI.getOperand(i: 2).getImm(); |
510 | assert(SrcBitWidth && "SrcBitWidth can't be zero" ); |
511 | APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: SrcBitWidth); |
512 | Known.Zero |= (~InMask); |
513 | Known.One &= (~Known.Zero); |
514 | break; |
515 | } |
516 | case TargetOpcode::G_ASSERT_ALIGN: { |
517 | int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm()); |
518 | |
519 | // TODO: Should use maximum with source |
520 | // If a node is guaranteed to be aligned, set low zero bits accordingly as |
521 | // well as clearing one bits. |
522 | Known.Zero.setLowBits(LogOfAlign); |
523 | Known.One.clearLowBits(loBits: LogOfAlign); |
524 | break; |
525 | } |
526 | case TargetOpcode::G_MERGE_VALUES: { |
527 | unsigned NumOps = MI.getNumOperands(); |
528 | unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
529 | |
530 | for (unsigned I = 0; I != NumOps - 1; ++I) { |
531 | KnownBits SrcOpKnown; |
532 | computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown, |
533 | DemandedElts, Depth: Depth + 1); |
534 | Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize); |
535 | } |
536 | break; |
537 | } |
538 | case TargetOpcode::G_UNMERGE_VALUES: { |
539 | unsigned NumOps = MI.getNumOperands(); |
540 | Register SrcReg = MI.getOperand(i: NumOps - 1).getReg(); |
541 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
542 | |
543 | if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType()) |
544 | return; // TODO: Handle vector->subelement unmerges |
545 | |
546 | // Figure out the result operand index |
547 | unsigned DstIdx = 0; |
548 | for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R; |
549 | ++DstIdx) |
550 | ; |
551 | |
552 | APInt SubDemandedElts = DemandedElts; |
553 | if (SrcTy.isVector()) { |
554 | unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1; |
555 | SubDemandedElts = |
556 | DemandedElts.zext(width: SrcTy.getNumElements()).shl(shiftAmt: DstIdx * DstLanes); |
557 | } |
558 | |
559 | KnownBits SrcOpKnown; |
560 | computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts: SubDemandedElts, Depth: Depth + 1); |
561 | |
562 | if (SrcTy.isVector()) |
563 | Known = std::move(SrcOpKnown); |
564 | else |
565 | Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx); |
566 | break; |
567 | } |
568 | case TargetOpcode::G_BSWAP: { |
569 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
570 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
571 | Known = Known.byteSwap(); |
572 | break; |
573 | } |
574 | case TargetOpcode::G_BITREVERSE: { |
575 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
576 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
577 | Known = Known.reverseBits(); |
578 | break; |
579 | } |
580 | case TargetOpcode::G_CTPOP: { |
581 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
582 | Depth: Depth + 1); |
583 | // We can bound the space the count needs. Also, bits known to be zero |
584 | // can't contribute to the population. |
585 | unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
586 | unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet); |
587 | Known.Zero.setBitsFrom(LowBits); |
588 | // TODO: we could bound Known.One using the lower bound on the number of |
589 | // bits which might be set provided by popcnt KnownOne2. |
590 | break; |
591 | } |
592 | case TargetOpcode::G_UBFX: { |
593 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
594 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
595 | Depth: Depth + 1); |
596 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
597 | Depth: Depth + 1); |
598 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
599 | Depth: Depth + 1); |
600 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
601 | break; |
602 | } |
603 | case TargetOpcode::G_SBFX: { |
604 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
605 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
606 | Depth: Depth + 1); |
607 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
608 | Depth: Depth + 1); |
609 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
610 | Depth: Depth + 1); |
611 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
612 | // Sign extend the extracted value using shift left and arithmetic shift |
613 | // right. |
614 | KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth)); |
615 | KnownBits ShiftKnown = KnownBits::sub(LHS: ExtKnown, RHS: WidthKnown); |
616 | Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown); |
617 | break; |
618 | } |
619 | case TargetOpcode::G_UADDO: |
620 | case TargetOpcode::G_UADDE: |
621 | case TargetOpcode::G_SADDO: |
622 | case TargetOpcode::G_SADDE: |
623 | case TargetOpcode::G_USUBO: |
624 | case TargetOpcode::G_USUBE: |
625 | case TargetOpcode::G_SSUBO: |
626 | case TargetOpcode::G_SSUBE: |
627 | case TargetOpcode::G_UMULO: |
628 | case TargetOpcode::G_SMULO: { |
629 | if (MI.getOperand(i: 1).getReg() == R) { |
630 | // If we know the result of a compare has the top bits zero, use this |
631 | // info. |
632 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
633 | TargetLowering::ZeroOrOneBooleanContent && |
634 | BitWidth > 1) |
635 | Known.Zero.setBitsFrom(1); |
636 | } |
637 | break; |
638 | } |
639 | case TargetOpcode::G_CTLZ: |
640 | case TargetOpcode::G_CTLZ_ZERO_UNDEF: { |
641 | KnownBits SrcOpKnown; |
642 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
643 | Depth: Depth + 1); |
644 | // If we have a known 1, its position is our upper bound. |
645 | unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); |
646 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
647 | Known.Zero.setBitsFrom(LowBits); |
648 | break; |
649 | } |
650 | case TargetOpcode::G_SHUFFLE_VECTOR: { |
651 | APInt DemandedLHS, DemandedRHS; |
652 | // Collect the known bits that are shared by every vector element referenced |
653 | // by the shuffle. |
654 | unsigned NumElts = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements(); |
655 | if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(), |
656 | DemandedElts, DemandedLHS, DemandedRHS)) |
657 | break; |
658 | |
659 | // Known bits are the values that are shared by every demanded element. |
660 | Known.Zero.setAllBits(); |
661 | Known.One.setAllBits(); |
662 | if (!!DemandedLHS) { |
663 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts: DemandedLHS, |
664 | Depth: Depth + 1); |
665 | Known = Known.intersectWith(RHS: Known2); |
666 | } |
667 | // If we don't know any bits, early out. |
668 | if (Known.isUnknown()) |
669 | break; |
670 | if (!!DemandedRHS) { |
671 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts: DemandedRHS, |
672 | Depth: Depth + 1); |
673 | Known = Known.intersectWith(RHS: Known2); |
674 | } |
675 | break; |
676 | } |
677 | case TargetOpcode::G_CONCAT_VECTORS: { |
678 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector()) |
679 | break; |
680 | // Split DemandedElts and test each of the demanded subvectors. |
681 | Known.Zero.setAllBits(); |
682 | Known.One.setAllBits(); |
683 | unsigned NumSubVectorElts = |
684 | MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements(); |
685 | |
686 | for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) { |
687 | APInt DemandedSub = |
688 | DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts); |
689 | if (!!DemandedSub) { |
690 | computeKnownBitsImpl(R: MO.getReg(), Known&: Known2, DemandedElts: DemandedSub, Depth: Depth + 1); |
691 | |
692 | Known = Known.intersectWith(RHS: Known2); |
693 | } |
694 | // If we don't know any bits, early out. |
695 | if (Known.isUnknown()) |
696 | break; |
697 | } |
698 | break; |
699 | } |
700 | } |
701 | |
702 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
703 | |
704 | // Update the cache. |
705 | ComputeKnownBitsCache[R] = Known; |
706 | } |
707 | |
708 | static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) { |
709 | Ty = Ty.getScalarType(); |
710 | DenormalMode Mode = MF.getDenormalMode(FPType: getFltSemanticForLLT(Ty)); |
711 | return Mode.Output == DenormalMode::IEEE || |
712 | Mode.Output == DenormalMode::PositiveZero; |
713 | } |
714 | |
715 | void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known, |
716 | FPClassTest InterestedClasses, |
717 | unsigned Depth) { |
718 | LLT Ty = MRI.getType(Reg: R); |
719 | APInt DemandedElts = |
720 | Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
721 | computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth); |
722 | } |
723 | |
724 | void GISelValueTracking::computeKnownFPClassForFPTrunc( |
725 | const MachineInstr &MI, const APInt &DemandedElts, |
726 | FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) { |
727 | if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == |
728 | fcNone) |
729 | return; |
730 | |
731 | Register Val = MI.getOperand(i: 1).getReg(); |
732 | KnownFPClass KnownSrc; |
733 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc, |
734 | Depth: Depth + 1); |
735 | |
736 | // Sign should be preserved |
737 | // TODO: Handle cannot be ordered greater than zero |
738 | if (KnownSrc.cannotBeOrderedLessThanZero()) |
739 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
740 | |
741 | Known.propagateNaN(Src: KnownSrc, PreserveSign: true); |
742 | |
743 | // Infinity needs a range check. |
744 | } |
745 | |
746 | void GISelValueTracking::computeKnownFPClass(Register R, |
747 | const APInt &DemandedElts, |
748 | FPClassTest InterestedClasses, |
749 | KnownFPClass &Known, |
750 | unsigned Depth) { |
751 | assert(Known.isUnknown() && "should not be called with known information" ); |
752 | |
753 | if (!DemandedElts) { |
754 | // No demanded elts, better to assume we don't know anything. |
755 | Known.resetAll(); |
756 | return; |
757 | } |
758 | |
759 | assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth" ); |
760 | |
761 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
762 | unsigned Opcode = MI.getOpcode(); |
763 | LLT DstTy = MRI.getType(Reg: R); |
764 | |
765 | if (!DstTy.isValid()) { |
766 | Known.resetAll(); |
767 | return; |
768 | } |
769 | |
770 | if (auto Cst = GFConstant::getConstant(Const: R, MRI)) { |
771 | switch (Cst->getKind()) { |
772 | case GFConstant::GFConstantKind::Scalar: { |
773 | auto APF = Cst->getScalarValue(); |
774 | Known.KnownFPClasses = APF.classify(); |
775 | Known.SignBit = APF.isNegative(); |
776 | break; |
777 | } |
778 | case GFConstant::GFConstantKind::FixedVector: { |
779 | Known.KnownFPClasses = fcNone; |
780 | bool SignBitAllZero = true; |
781 | bool SignBitAllOne = true; |
782 | |
783 | for (auto C : *Cst) { |
784 | Known.KnownFPClasses |= C.classify(); |
785 | if (C.isNegative()) |
786 | SignBitAllZero = false; |
787 | else |
788 | SignBitAllOne = false; |
789 | } |
790 | |
791 | if (SignBitAllOne != SignBitAllZero) |
792 | Known.SignBit = SignBitAllOne; |
793 | |
794 | break; |
795 | } |
796 | case GFConstant::GFConstantKind::ScalableVector: { |
797 | Known.resetAll(); |
798 | break; |
799 | } |
800 | } |
801 | |
802 | return; |
803 | } |
804 | |
805 | FPClassTest KnownNotFromFlags = fcNone; |
806 | if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoNans)) |
807 | KnownNotFromFlags |= fcNan; |
808 | if (MI.getFlag(Flag: MachineInstr::MIFlag::FmNoInfs)) |
809 | KnownNotFromFlags |= fcInf; |
810 | |
811 | // We no longer need to find out about these bits from inputs if we can |
812 | // assume this from flags/attributes. |
813 | InterestedClasses &= ~KnownNotFromFlags; |
814 | |
815 | auto ClearClassesFromFlags = |
816 | make_scope_exit(F: [=, &Known] { Known.knownNot(RuleOut: KnownNotFromFlags); }); |
817 | |
818 | // All recursive calls that increase depth must come after this. |
819 | if (Depth == MaxAnalysisRecursionDepth) |
820 | return; |
821 | |
822 | const MachineFunction *MF = MI.getMF(); |
823 | |
824 | switch (Opcode) { |
825 | default: |
826 | TL.computeKnownFPClassForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI, |
827 | Depth); |
828 | break; |
829 | case TargetOpcode::G_FNEG: { |
830 | Register Val = MI.getOperand(i: 1).getReg(); |
831 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, Depth: Depth + 1); |
832 | Known.fneg(); |
833 | break; |
834 | } |
835 | case TargetOpcode::G_SELECT: { |
836 | GSelect &SelMI = cast<GSelect>(Val&: MI); |
837 | Register Cond = SelMI.getCondReg(); |
838 | Register LHS = SelMI.getTrueReg(); |
839 | Register RHS = SelMI.getFalseReg(); |
840 | |
841 | FPClassTest FilterLHS = fcAllFlags; |
842 | FPClassTest FilterRHS = fcAllFlags; |
843 | |
844 | Register TestedValue; |
845 | FPClassTest MaskIfTrue = fcAllFlags; |
846 | FPClassTest MaskIfFalse = fcAllFlags; |
847 | FPClassTest ClassVal = fcNone; |
848 | |
849 | CmpInst::Predicate Pred; |
850 | Register CmpLHS, CmpRHS; |
851 | if (mi_match(R: Cond, MRI, |
852 | P: m_GFCmp(P: m_Pred(P&: Pred), L: m_Reg(R&: CmpLHS), R: m_Reg(R&: CmpRHS)))) { |
853 | // If the select filters out a value based on the class, it no longer |
854 | // participates in the class of the result |
855 | |
856 | // TODO: In some degenerate cases we can infer something if we try again |
857 | // without looking through sign operations. |
858 | bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; |
859 | std::tie(args&: TestedValue, args&: MaskIfTrue, args&: MaskIfFalse) = |
860 | fcmpImpliesClass(Pred, MF: *MF, LHS: CmpLHS, RHS: CmpRHS, LookThroughSrc: LookThroughFAbsFNeg); |
861 | } else if (mi_match( |
862 | R: Cond, MRI, |
863 | P: m_GIsFPClass(L: m_Reg(R&: TestedValue), T: m_FPClassTest(T&: ClassVal)))) { |
864 | FPClassTest TestedMask = ClassVal; |
865 | MaskIfTrue = TestedMask; |
866 | MaskIfFalse = ~TestedMask; |
867 | } |
868 | |
869 | if (TestedValue == LHS) { |
870 | // match !isnan(x) ? x : y |
871 | FilterLHS = MaskIfTrue; |
872 | } else if (TestedValue == RHS) { // && IsExactClass |
873 | // match !isnan(x) ? y : x |
874 | FilterRHS = MaskIfFalse; |
875 | } |
876 | |
877 | KnownFPClass Known2; |
878 | computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & FilterLHS, Known, |
879 | Depth: Depth + 1); |
880 | Known.KnownFPClasses &= FilterLHS; |
881 | |
882 | computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedClasses & FilterRHS, |
883 | Known&: Known2, Depth: Depth + 1); |
884 | Known2.KnownFPClasses &= FilterRHS; |
885 | |
886 | Known |= Known2; |
887 | break; |
888 | } |
889 | case TargetOpcode::G_FCOPYSIGN: { |
890 | Register Magnitude = MI.getOperand(i: 1).getReg(); |
891 | Register Sign = MI.getOperand(i: 2).getReg(); |
892 | |
893 | KnownFPClass KnownSign; |
894 | |
895 | computeKnownFPClass(R: Magnitude, DemandedElts, InterestedClasses, Known, |
896 | Depth: Depth + 1); |
897 | computeKnownFPClass(R: Sign, DemandedElts, InterestedClasses, Known&: KnownSign, |
898 | Depth: Depth + 1); |
899 | Known.copysign(Sign: KnownSign); |
900 | break; |
901 | } |
902 | case TargetOpcode::G_FMA: |
903 | case TargetOpcode::G_STRICT_FMA: |
904 | case TargetOpcode::G_FMAD: { |
905 | if ((InterestedClasses & fcNegative) == fcNone) |
906 | break; |
907 | |
908 | Register A = MI.getOperand(i: 1).getReg(); |
909 | Register B = MI.getOperand(i: 2).getReg(); |
910 | Register C = MI.getOperand(i: 3).getReg(); |
911 | |
912 | if (A != B) |
913 | break; |
914 | |
915 | // The multiply cannot be -0 and therefore the add can't be -0 |
916 | Known.knownNot(RuleOut: fcNegZero); |
917 | |
918 | // x * x + y is non-negative if y is non-negative. |
919 | KnownFPClass KnownAddend; |
920 | computeKnownFPClass(R: C, DemandedElts, InterestedClasses, Known&: KnownAddend, |
921 | Depth: Depth + 1); |
922 | |
923 | if (KnownAddend.cannotBeOrderedLessThanZero()) |
924 | Known.knownNot(RuleOut: fcNegative); |
925 | break; |
926 | } |
927 | case TargetOpcode::G_FSQRT: |
928 | case TargetOpcode::G_STRICT_FSQRT: { |
929 | KnownFPClass KnownSrc; |
930 | FPClassTest InterestedSrcs = InterestedClasses; |
931 | if (InterestedClasses & fcNan) |
932 | InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; |
933 | |
934 | Register Val = MI.getOperand(i: 1).getReg(); |
935 | |
936 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1); |
937 | |
938 | if (KnownSrc.isKnownNeverPosInfinity()) |
939 | Known.knownNot(RuleOut: fcPosInf); |
940 | if (KnownSrc.isKnownNever(Mask: fcSNan)) |
941 | Known.knownNot(RuleOut: fcSNan); |
942 | |
943 | // Any negative value besides -0 returns a nan. |
944 | if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) |
945 | Known.knownNot(RuleOut: fcNan); |
946 | |
947 | // The only negative value that can be returned is -0 for -0 inputs. |
948 | Known.knownNot(RuleOut: fcNegInf | fcNegSubnormal | fcNegNormal); |
949 | break; |
950 | } |
951 | case TargetOpcode::G_FABS: { |
952 | if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { |
953 | Register Val = MI.getOperand(i: 1).getReg(); |
954 | // If we only care about the sign bit we don't need to inspect the |
955 | // operand. |
956 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known, |
957 | Depth: Depth + 1); |
958 | } |
959 | Known.fabs(); |
960 | break; |
961 | } |
962 | case TargetOpcode::G_FSIN: |
963 | case TargetOpcode::G_FCOS: |
964 | case TargetOpcode::G_FSINCOS: { |
965 | // Return NaN on infinite inputs. |
966 | Register Val = MI.getOperand(i: 1).getReg(); |
967 | KnownFPClass KnownSrc; |
968 | |
969 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc, |
970 | Depth: Depth + 1); |
971 | Known.knownNot(RuleOut: fcInf); |
972 | |
973 | if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) |
974 | Known.knownNot(RuleOut: fcNan); |
975 | break; |
976 | } |
977 | case TargetOpcode::G_FMAXNUM: |
978 | case TargetOpcode::G_FMINNUM: |
979 | case TargetOpcode::G_FMINNUM_IEEE: |
980 | case TargetOpcode::G_FMAXIMUM: |
981 | case TargetOpcode::G_FMINIMUM: |
982 | case TargetOpcode::G_FMAXNUM_IEEE: |
983 | case TargetOpcode::G_FMAXIMUMNUM: |
984 | case TargetOpcode::G_FMINIMUMNUM: { |
985 | Register LHS = MI.getOperand(i: 1).getReg(); |
986 | Register RHS = MI.getOperand(i: 2).getReg(); |
987 | KnownFPClass KnownLHS, KnownRHS; |
988 | |
989 | computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses, Known&: KnownLHS, |
990 | Depth: Depth + 1); |
991 | computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses, Known&: KnownRHS, |
992 | Depth: Depth + 1); |
993 | |
994 | bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); |
995 | Known = KnownLHS | KnownRHS; |
996 | |
997 | // If either operand is not NaN, the result is not NaN. |
998 | if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM || |
999 | Opcode == TargetOpcode::G_FMAXNUM || |
1000 | Opcode == TargetOpcode::G_FMINIMUMNUM || |
1001 | Opcode == TargetOpcode::G_FMAXIMUMNUM)) |
1002 | Known.knownNot(RuleOut: fcNan); |
1003 | |
1004 | if (Opcode == TargetOpcode::G_FMAXNUM || |
1005 | Opcode == TargetOpcode::G_FMAXIMUMNUM || |
1006 | Opcode == TargetOpcode::G_FMAXNUM_IEEE) { |
1007 | // If at least one operand is known to be positive, the result must be |
1008 | // positive. |
1009 | if ((KnownLHS.cannotBeOrderedLessThanZero() && |
1010 | KnownLHS.isKnownNeverNaN()) || |
1011 | (KnownRHS.cannotBeOrderedLessThanZero() && |
1012 | KnownRHS.isKnownNeverNaN())) |
1013 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
1014 | } else if (Opcode == TargetOpcode::G_FMAXIMUM) { |
1015 | // If at least one operand is known to be positive, the result must be |
1016 | // positive. |
1017 | if (KnownLHS.cannotBeOrderedLessThanZero() || |
1018 | KnownRHS.cannotBeOrderedLessThanZero()) |
1019 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
1020 | } else if (Opcode == TargetOpcode::G_FMINNUM || |
1021 | Opcode == TargetOpcode::G_FMINIMUMNUM || |
1022 | Opcode == TargetOpcode::G_FMINNUM_IEEE) { |
1023 | // If at least one operand is known to be negative, the result must be |
1024 | // negative. |
1025 | if ((KnownLHS.cannotBeOrderedGreaterThanZero() && |
1026 | KnownLHS.isKnownNeverNaN()) || |
1027 | (KnownRHS.cannotBeOrderedGreaterThanZero() && |
1028 | KnownRHS.isKnownNeverNaN())) |
1029 | Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask); |
1030 | } else if (Opcode == TargetOpcode::G_FMINIMUM) { |
1031 | // If at least one operand is known to be negative, the result must be |
1032 | // negative. |
1033 | if (KnownLHS.cannotBeOrderedGreaterThanZero() || |
1034 | KnownRHS.cannotBeOrderedGreaterThanZero()) |
1035 | Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask); |
1036 | } else { |
1037 | llvm_unreachable("unhandled intrinsic" ); |
1038 | } |
1039 | |
1040 | // Fixup zero handling if denormals could be returned as a zero. |
1041 | // |
1042 | // As there's no spec for denormal flushing, be conservative with the |
1043 | // treatment of denormals that could be flushed to zero. For older |
1044 | // subtargets on AMDGPU the min/max instructions would not flush the |
1045 | // output and return the original value. |
1046 | // |
1047 | if ((Known.KnownFPClasses & fcZero) != fcNone && |
1048 | !Known.isKnownNeverSubnormal()) { |
1049 | DenormalMode Mode = |
1050 | MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())); |
1051 | if (Mode != DenormalMode::getIEEE()) |
1052 | Known.KnownFPClasses |= fcZero; |
1053 | } |
1054 | |
1055 | if (Known.isKnownNeverNaN()) { |
1056 | if (KnownLHS.SignBit && KnownRHS.SignBit && |
1057 | *KnownLHS.SignBit == *KnownRHS.SignBit) { |
1058 | if (*KnownLHS.SignBit) |
1059 | Known.signBitMustBeOne(); |
1060 | else |
1061 | Known.signBitMustBeZero(); |
1062 | } else if ((Opcode == TargetOpcode::G_FMAXIMUM || |
1063 | Opcode == TargetOpcode::G_FMINIMUM) || |
1064 | Opcode == TargetOpcode::G_FMAXIMUMNUM || |
1065 | Opcode == TargetOpcode::G_FMINIMUMNUM || |
1066 | Opcode == TargetOpcode::G_FMAXNUM_IEEE || |
1067 | Opcode == TargetOpcode::G_FMINNUM_IEEE || |
1068 | // FIXME: Should be using logical zero versions |
1069 | ((KnownLHS.isKnownNeverNegZero() || |
1070 | KnownRHS.isKnownNeverPosZero()) && |
1071 | (KnownLHS.isKnownNeverPosZero() || |
1072 | KnownRHS.isKnownNeverNegZero()))) { |
1073 | if ((Opcode == TargetOpcode::G_FMAXIMUM || |
1074 | Opcode == TargetOpcode::G_FMAXNUM || |
1075 | Opcode == TargetOpcode::G_FMAXIMUMNUM || |
1076 | Opcode == TargetOpcode::G_FMAXNUM_IEEE) && |
1077 | (KnownLHS.SignBit == false || KnownRHS.SignBit == false)) |
1078 | Known.signBitMustBeZero(); |
1079 | else if ((Opcode == TargetOpcode::G_FMINIMUM || |
1080 | Opcode == TargetOpcode::G_FMINNUM || |
1081 | Opcode == TargetOpcode::G_FMINIMUMNUM || |
1082 | Opcode == TargetOpcode::G_FMINNUM_IEEE) && |
1083 | (KnownLHS.SignBit == true || KnownRHS.SignBit == true)) |
1084 | Known.signBitMustBeOne(); |
1085 | } |
1086 | } |
1087 | break; |
1088 | } |
1089 | case TargetOpcode::G_FCANONICALIZE: { |
1090 | Register Val = MI.getOperand(i: 1).getReg(); |
1091 | KnownFPClass KnownSrc; |
1092 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc, |
1093 | Depth: Depth + 1); |
1094 | |
1095 | // This is essentially a stronger form of |
1096 | // propagateCanonicalizingSrc. Other "canonicalizing" operations don't |
1097 | // actually have an IR canonicalization guarantee. |
1098 | |
1099 | // Canonicalize may flush denormals to zero, so we have to consider the |
1100 | // denormal mode to preserve known-not-0 knowledge. |
1101 | Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; |
1102 | |
1103 | // Stronger version of propagateNaN |
1104 | // Canonicalize is guaranteed to quiet signaling nans. |
1105 | if (KnownSrc.isKnownNeverNaN()) |
1106 | Known.knownNot(RuleOut: fcNan); |
1107 | else |
1108 | Known.knownNot(RuleOut: fcSNan); |
1109 | |
1110 | // If the parent function flushes denormals, the canonical output cannot |
1111 | // be a denormal. |
1112 | LLT Ty = MRI.getType(Reg: Val).getScalarType(); |
1113 | const fltSemantics &FPType = getFltSemanticForLLT(Ty); |
1114 | DenormalMode DenormMode = MF->getDenormalMode(FPType); |
1115 | if (DenormMode == DenormalMode::getIEEE()) { |
1116 | if (KnownSrc.isKnownNever(Mask: fcPosZero)) |
1117 | Known.knownNot(RuleOut: fcPosZero); |
1118 | if (KnownSrc.isKnownNever(Mask: fcNegZero)) |
1119 | Known.knownNot(RuleOut: fcNegZero); |
1120 | break; |
1121 | } |
1122 | |
1123 | if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) |
1124 | Known.knownNot(RuleOut: fcSubnormal); |
1125 | |
1126 | if (DenormMode.Input == DenormalMode::PositiveZero || |
1127 | (DenormMode.Output == DenormalMode::PositiveZero && |
1128 | DenormMode.Input == DenormalMode::IEEE)) |
1129 | Known.knownNot(RuleOut: fcNegZero); |
1130 | |
1131 | break; |
1132 | } |
1133 | case TargetOpcode::G_VECREDUCE_FMAX: |
1134 | case TargetOpcode::G_VECREDUCE_FMIN: |
1135 | case TargetOpcode::G_VECREDUCE_FMAXIMUM: |
1136 | case TargetOpcode::G_VECREDUCE_FMINIMUM: { |
1137 | Register Val = MI.getOperand(i: 1).getReg(); |
1138 | // reduce min/max will choose an element from one of the vector elements, |
1139 | // so we can infer and class information that is common to all elements. |
1140 | |
1141 | Known = |
1142 | computeKnownFPClass(R: Val, Flags: MI.getFlags(), InterestedClasses, Depth: Depth + 1); |
1143 | // Can only propagate sign if output is never NaN. |
1144 | if (!Known.isKnownNeverNaN()) |
1145 | Known.SignBit.reset(); |
1146 | break; |
1147 | } |
1148 | case TargetOpcode::G_TRUNC: |
1149 | case TargetOpcode::G_FFLOOR: |
1150 | case TargetOpcode::G_FCEIL: |
1151 | case TargetOpcode::G_FRINT: |
1152 | case TargetOpcode::G_FNEARBYINT: |
1153 | case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: |
1154 | case TargetOpcode::G_INTRINSIC_ROUND: { |
1155 | Register Val = MI.getOperand(i: 1).getReg(); |
1156 | KnownFPClass KnownSrc; |
1157 | FPClassTest InterestedSrcs = InterestedClasses; |
1158 | if (InterestedSrcs & fcPosFinite) |
1159 | InterestedSrcs |= fcPosFinite; |
1160 | if (InterestedSrcs & fcNegFinite) |
1161 | InterestedSrcs |= fcNegFinite; |
1162 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1); |
1163 | |
1164 | // Integer results cannot be subnormal. |
1165 | Known.knownNot(RuleOut: fcSubnormal); |
1166 | |
1167 | Known.propagateNaN(Src: KnownSrc, PreserveSign: true); |
1168 | |
1169 | // TODO: handle multi unit FPTypes once LLT FPInfo lands |
1170 | |
1171 | // Negative round ups to 0 produce -0 |
1172 | if (KnownSrc.isKnownNever(Mask: fcPosFinite)) |
1173 | Known.knownNot(RuleOut: fcPosFinite); |
1174 | if (KnownSrc.isKnownNever(Mask: fcNegFinite)) |
1175 | Known.knownNot(RuleOut: fcNegFinite); |
1176 | |
1177 | break; |
1178 | } |
1179 | case TargetOpcode::G_FEXP: |
1180 | case TargetOpcode::G_FEXP2: |
1181 | case TargetOpcode::G_FEXP10: { |
1182 | Known.knownNot(RuleOut: fcNegative); |
1183 | if ((InterestedClasses & fcNan) == fcNone) |
1184 | break; |
1185 | |
1186 | Register Val = MI.getOperand(i: 1).getReg(); |
1187 | KnownFPClass KnownSrc; |
1188 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc, |
1189 | Depth: Depth + 1); |
1190 | if (KnownSrc.isKnownNeverNaN()) { |
1191 | Known.knownNot(RuleOut: fcNan); |
1192 | Known.signBitMustBeZero(); |
1193 | } |
1194 | |
1195 | break; |
1196 | } |
1197 | case TargetOpcode::G_FLOG: |
1198 | case TargetOpcode::G_FLOG2: |
1199 | case TargetOpcode::G_FLOG10: { |
1200 | // log(+inf) -> +inf |
1201 | // log([+-]0.0) -> -inf |
1202 | // log(-inf) -> nan |
1203 | // log(-x) -> nan |
1204 | if ((InterestedClasses & (fcNan | fcInf)) == fcNone) |
1205 | break; |
1206 | |
1207 | FPClassTest InterestedSrcs = InterestedClasses; |
1208 | if ((InterestedClasses & fcNegInf) != fcNone) |
1209 | InterestedSrcs |= fcZero | fcSubnormal; |
1210 | if ((InterestedClasses & fcNan) != fcNone) |
1211 | InterestedSrcs |= fcNan | (fcNegative & ~fcNan); |
1212 | |
1213 | Register Val = MI.getOperand(i: 1).getReg(); |
1214 | KnownFPClass KnownSrc; |
1215 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownSrc, Depth: Depth + 1); |
1216 | |
1217 | if (KnownSrc.isKnownNeverPosInfinity()) |
1218 | Known.knownNot(RuleOut: fcPosInf); |
1219 | |
1220 | if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) |
1221 | Known.knownNot(RuleOut: fcNan); |
1222 | |
1223 | LLT Ty = MRI.getType(Reg: Val).getScalarType(); |
1224 | const fltSemantics &FltSem = getFltSemanticForLLT(Ty); |
1225 | DenormalMode Mode = MF->getDenormalMode(FPType: FltSem); |
1226 | |
1227 | if (KnownSrc.isKnownNeverLogicalZero(Mode)) |
1228 | Known.knownNot(RuleOut: fcNegInf); |
1229 | |
1230 | break; |
1231 | } |
1232 | case TargetOpcode::G_FPOWI: { |
1233 | if ((InterestedClasses & fcNegative) == fcNone) |
1234 | break; |
1235 | |
1236 | Register Exp = MI.getOperand(i: 2).getReg(); |
1237 | LLT ExpTy = MRI.getType(Reg: Exp); |
1238 | KnownBits ExponentKnownBits = getKnownBits( |
1239 | R: Exp, DemandedElts: ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth: Depth + 1); |
1240 | |
1241 | if (ExponentKnownBits.Zero[0]) { // Is even |
1242 | Known.knownNot(RuleOut: fcNegative); |
1243 | break; |
1244 | } |
1245 | |
1246 | // Given that exp is an integer, here are the |
1247 | // ways that pow can return a negative value: |
1248 | // |
1249 | // pow(-x, exp) --> negative if exp is odd and x is negative. |
1250 | // pow(-0, exp) --> -inf if exp is negative odd. |
1251 | // pow(-0, exp) --> -0 if exp is positive odd. |
1252 | // pow(-inf, exp) --> -0 if exp is negative odd. |
1253 | // pow(-inf, exp) --> -inf if exp is positive odd. |
1254 | Register Val = MI.getOperand(i: 1).getReg(); |
1255 | KnownFPClass KnownSrc; |
1256 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses: fcNegative, Known&: KnownSrc, Depth: Depth + 1); |
1257 | if (KnownSrc.isKnownNever(Mask: fcNegative)) |
1258 | Known.knownNot(RuleOut: fcNegative); |
1259 | break; |
1260 | } |
1261 | case TargetOpcode::G_FLDEXP: |
1262 | case TargetOpcode::G_STRICT_FLDEXP: { |
1263 | Register Val = MI.getOperand(i: 1).getReg(); |
1264 | KnownFPClass KnownSrc; |
1265 | computeKnownFPClass(R: Val, DemandedElts, InterestedClasses, Known&: KnownSrc, |
1266 | Depth: Depth + 1); |
1267 | Known.propagateNaN(Src: KnownSrc, /*PropagateSign=*/PreserveSign: true); |
1268 | |
1269 | // Sign is preserved, but underflows may produce zeroes. |
1270 | if (KnownSrc.isKnownNever(Mask: fcNegative)) |
1271 | Known.knownNot(RuleOut: fcNegative); |
1272 | else if (KnownSrc.cannotBeOrderedLessThanZero()) |
1273 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
1274 | |
1275 | if (KnownSrc.isKnownNever(Mask: fcPositive)) |
1276 | Known.knownNot(RuleOut: fcPositive); |
1277 | else if (KnownSrc.cannotBeOrderedGreaterThanZero()) |
1278 | Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask); |
1279 | |
1280 | // Can refine inf/zero handling based on the exponent operand. |
1281 | const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; |
1282 | if ((InterestedClasses & ExpInfoMask) == fcNone) |
1283 | break; |
1284 | if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) |
1285 | break; |
1286 | |
1287 | // TODO: Handle constant range of Exp |
1288 | |
1289 | break; |
1290 | } |
1291 | case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { |
1292 | computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, |
1293 | Depth); |
1294 | break; |
1295 | } |
1296 | case TargetOpcode::G_FADD: |
1297 | case TargetOpcode::G_STRICT_FADD: |
1298 | case TargetOpcode::G_FSUB: |
1299 | case TargetOpcode::G_STRICT_FSUB: { |
1300 | Register LHS = MI.getOperand(i: 1).getReg(); |
1301 | Register RHS = MI.getOperand(i: 2).getReg(); |
1302 | KnownFPClass KnownLHS, KnownRHS; |
1303 | bool WantNegative = |
1304 | (Opcode == TargetOpcode::G_FADD || |
1305 | Opcode == TargetOpcode::G_STRICT_FADD) && |
1306 | (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; |
1307 | bool WantNaN = (InterestedClasses & fcNan) != fcNone; |
1308 | bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; |
1309 | |
1310 | if (!WantNaN && !WantNegative && !WantNegZero) |
1311 | break; |
1312 | |
1313 | FPClassTest InterestedSrcs = InterestedClasses; |
1314 | if (WantNegative) |
1315 | InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; |
1316 | if (InterestedClasses & fcNan) |
1317 | InterestedSrcs |= fcInf; |
1318 | computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownRHS, Depth: Depth + 1); |
1319 | |
1320 | if ((WantNaN && KnownRHS.isKnownNeverNaN()) || |
1321 | (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || |
1322 | WantNegZero || |
1323 | (Opcode == TargetOpcode::G_FSUB || |
1324 | Opcode == TargetOpcode::G_STRICT_FSUB)) { |
1325 | |
1326 | // RHS is canonically cheaper to compute. Skip inspecting the LHS if |
1327 | // there's no point. |
1328 | computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedSrcs, Known&: KnownLHS, |
1329 | Depth: Depth + 1); |
1330 | // Adding positive and negative infinity produces NaN. |
1331 | // TODO: Check sign of infinities. |
1332 | if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
1333 | (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) |
1334 | Known.knownNot(RuleOut: fcNan); |
1335 | |
1336 | if (Opcode == Instruction::FAdd) { |
1337 | if (KnownLHS.cannotBeOrderedLessThanZero() && |
1338 | KnownRHS.cannotBeOrderedLessThanZero()) |
1339 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
1340 | |
1341 | // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. |
1342 | if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode( |
1343 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) || |
1344 | KnownRHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode( |
1345 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) && |
1346 | // Make sure output negative denormal can't flush to -0 |
1347 | outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy)) |
1348 | Known.knownNot(RuleOut: fcNegZero); |
1349 | } else { |
1350 | // Only fsub -0, +0 can return -0 |
1351 | if ((KnownLHS.isKnownNeverLogicalNegZero(Mode: MF->getDenormalMode( |
1352 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))) || |
1353 | KnownRHS.isKnownNeverLogicalPosZero(Mode: MF->getDenormalMode( |
1354 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) && |
1355 | // Make sure output negative denormal can't flush to -0 |
1356 | outputDenormalIsIEEEOrPosZero(MF: *MF, Ty: DstTy)) |
1357 | Known.knownNot(RuleOut: fcNegZero); |
1358 | } |
1359 | } |
1360 | |
1361 | break; |
1362 | } |
1363 | case TargetOpcode::G_FMUL: |
1364 | case TargetOpcode::G_STRICT_FMUL: { |
1365 | Register LHS = MI.getOperand(i: 1).getReg(); |
1366 | Register RHS = MI.getOperand(i: 2).getReg(); |
1367 | // X * X is always non-negative or a NaN. |
1368 | if (LHS == RHS) |
1369 | Known.knownNot(RuleOut: fcNegative); |
1370 | |
1371 | if ((InterestedClasses & fcNan) != fcNan) |
1372 | break; |
1373 | |
1374 | // fcSubnormal is only needed in case of DAZ. |
1375 | const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; |
1376 | |
1377 | KnownFPClass KnownLHS, KnownRHS; |
1378 | computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownRHS, Depth: Depth + 1); |
1379 | if (!KnownRHS.isKnownNeverNaN()) |
1380 | break; |
1381 | |
1382 | computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: NeedForNan, Known&: KnownLHS, Depth: Depth + 1); |
1383 | if (!KnownLHS.isKnownNeverNaN()) |
1384 | break; |
1385 | |
1386 | if (KnownLHS.SignBit && KnownRHS.SignBit) { |
1387 | if (*KnownLHS.SignBit == *KnownRHS.SignBit) |
1388 | Known.signBitMustBeZero(); |
1389 | else |
1390 | Known.signBitMustBeOne(); |
1391 | } |
1392 | |
1393 | // If 0 * +/-inf produces NaN. |
1394 | if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { |
1395 | Known.knownNot(RuleOut: fcNan); |
1396 | break; |
1397 | } |
1398 | |
1399 | if ((KnownRHS.isKnownNeverInfinity() || |
1400 | KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode( |
1401 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) && |
1402 | (KnownLHS.isKnownNeverInfinity() || |
1403 | KnownRHS.isKnownNeverLogicalZero( |
1404 | Mode: MF->getDenormalMode(FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType()))))) |
1405 | Known.knownNot(RuleOut: fcNan); |
1406 | |
1407 | break; |
1408 | } |
1409 | case TargetOpcode::G_FDIV: |
1410 | case TargetOpcode::G_FREM: { |
1411 | Register LHS = MI.getOperand(i: 1).getReg(); |
1412 | Register RHS = MI.getOperand(i: 2).getReg(); |
1413 | |
1414 | if (LHS == RHS) { |
1415 | // TODO: Could filter out snan if we inspect the operand |
1416 | if (Opcode == TargetOpcode::G_FDIV) { |
1417 | // X / X is always exactly 1.0 or a NaN. |
1418 | Known.KnownFPClasses = fcNan | fcPosNormal; |
1419 | } else { |
1420 | // X % X is always exactly [+-]0.0 or a NaN. |
1421 | Known.KnownFPClasses = fcNan | fcZero; |
1422 | } |
1423 | |
1424 | break; |
1425 | } |
1426 | |
1427 | const bool WantNan = (InterestedClasses & fcNan) != fcNone; |
1428 | const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; |
1429 | const bool WantPositive = Opcode == TargetOpcode::G_FREM && |
1430 | (InterestedClasses & fcPositive) != fcNone; |
1431 | if (!WantNan && !WantNegative && !WantPositive) |
1432 | break; |
1433 | |
1434 | KnownFPClass KnownLHS, KnownRHS; |
1435 | |
1436 | computeKnownFPClass(R: RHS, DemandedElts, InterestedClasses: fcNan | fcInf | fcZero | fcNegative, |
1437 | Known&: KnownRHS, Depth: Depth + 1); |
1438 | |
1439 | bool KnowSomethingUseful = |
1440 | KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(Mask: fcNegative); |
1441 | |
1442 | if (KnowSomethingUseful || WantPositive) { |
1443 | const FPClassTest InterestedLHS = |
1444 | WantPositive ? fcAllFlags |
1445 | : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; |
1446 | |
1447 | computeKnownFPClass(R: LHS, DemandedElts, InterestedClasses: InterestedClasses & InterestedLHS, |
1448 | Known&: KnownLHS, Depth: Depth + 1); |
1449 | } |
1450 | |
1451 | if (Opcode == Instruction::FDiv) { |
1452 | // Only 0/0, Inf/Inf produce NaN. |
1453 | if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
1454 | (KnownLHS.isKnownNeverInfinity() || |
1455 | KnownRHS.isKnownNeverInfinity()) && |
1456 | ((KnownLHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode( |
1457 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) || |
1458 | (KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode( |
1459 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))))) { |
1460 | Known.knownNot(RuleOut: fcNan); |
1461 | } |
1462 | |
1463 | // X / -0.0 is -Inf (or NaN). |
1464 | // +X / +X is +X |
1465 | if (KnownLHS.isKnownNever(Mask: fcNegative) && |
1466 | KnownRHS.isKnownNever(Mask: fcNegative)) |
1467 | Known.knownNot(RuleOut: fcNegative); |
1468 | } else { |
1469 | // Inf REM x and x REM 0 produce NaN. |
1470 | if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && |
1471 | KnownLHS.isKnownNeverInfinity() && |
1472 | KnownRHS.isKnownNeverLogicalZero(Mode: MF->getDenormalMode( |
1473 | FPType: getFltSemanticForLLT(Ty: DstTy.getScalarType())))) { |
1474 | Known.knownNot(RuleOut: fcNan); |
1475 | } |
1476 | |
1477 | // The sign for frem is the same as the first operand. |
1478 | if (KnownLHS.cannotBeOrderedLessThanZero()) |
1479 | Known.knownNot(RuleOut: KnownFPClass::OrderedLessThanZeroMask); |
1480 | if (KnownLHS.cannotBeOrderedGreaterThanZero()) |
1481 | Known.knownNot(RuleOut: KnownFPClass::OrderedGreaterThanZeroMask); |
1482 | |
1483 | // See if we can be more aggressive about the sign of 0. |
1484 | if (KnownLHS.isKnownNever(Mask: fcNegative)) |
1485 | Known.knownNot(RuleOut: fcNegative); |
1486 | if (KnownLHS.isKnownNever(Mask: fcPositive)) |
1487 | Known.knownNot(RuleOut: fcPositive); |
1488 | } |
1489 | |
1490 | break; |
1491 | } |
1492 | case TargetOpcode::G_FPEXT: { |
1493 | Register Dst = MI.getOperand(i: 0).getReg(); |
1494 | Register Src = MI.getOperand(i: 1).getReg(); |
1495 | // Infinity, nan and zero propagate from source. |
1496 | computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth: Depth + 1); |
1497 | |
1498 | LLT DstTy = MRI.getType(Reg: Dst).getScalarType(); |
1499 | const fltSemantics &DstSem = getFltSemanticForLLT(Ty: DstTy); |
1500 | LLT SrcTy = MRI.getType(Reg: Src).getScalarType(); |
1501 | const fltSemantics &SrcSem = getFltSemanticForLLT(Ty: SrcTy); |
1502 | |
1503 | // All subnormal inputs should be in the normal range in the result type. |
1504 | if (APFloat::isRepresentableAsNormalIn(Src: SrcSem, Dst: DstSem)) { |
1505 | if (Known.KnownFPClasses & fcPosSubnormal) |
1506 | Known.KnownFPClasses |= fcPosNormal; |
1507 | if (Known.KnownFPClasses & fcNegSubnormal) |
1508 | Known.KnownFPClasses |= fcNegNormal; |
1509 | Known.knownNot(RuleOut: fcSubnormal); |
1510 | } |
1511 | |
1512 | // Sign bit of a nan isn't guaranteed. |
1513 | if (!Known.isKnownNeverNaN()) |
1514 | Known.SignBit = std::nullopt; |
1515 | break; |
1516 | } |
1517 | case TargetOpcode::G_FPTRUNC: { |
1518 | computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, |
1519 | Depth); |
1520 | break; |
1521 | } |
1522 | case TargetOpcode::G_SITOFP: |
1523 | case TargetOpcode::G_UITOFP: { |
1524 | // Cannot produce nan |
1525 | Known.knownNot(RuleOut: fcNan); |
1526 | |
1527 | // Integers cannot be subnormal |
1528 | Known.knownNot(RuleOut: fcSubnormal); |
1529 | |
1530 | // sitofp and uitofp turn into +0.0 for zero. |
1531 | Known.knownNot(RuleOut: fcNegZero); |
1532 | if (Opcode == TargetOpcode::G_UITOFP) |
1533 | Known.signBitMustBeZero(); |
1534 | |
1535 | Register Val = MI.getOperand(i: 1).getReg(); |
1536 | LLT Ty = MRI.getType(Reg: Val); |
1537 | |
1538 | if (InterestedClasses & fcInf) { |
1539 | // Get width of largest magnitude integer (remove a bit if signed). |
1540 | // This still works for a signed minimum value because the largest FP |
1541 | // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).; |
1542 | int IntSize = Ty.getScalarSizeInBits(); |
1543 | if (Opcode == TargetOpcode::G_SITOFP) |
1544 | --IntSize; |
1545 | |
1546 | // If the exponent of the largest finite FP value can hold the largest |
1547 | // integer, the result of the cast must be finite. |
1548 | LLT FPTy = DstTy.getScalarType(); |
1549 | const fltSemantics &FltSem = getFltSemanticForLLT(Ty: FPTy); |
1550 | if (ilogb(Arg: APFloat::getLargest(Sem: FltSem)) >= IntSize) |
1551 | Known.knownNot(RuleOut: fcInf); |
1552 | } |
1553 | |
1554 | break; |
1555 | } |
1556 | // case TargetOpcode::G_MERGE_VALUES: |
1557 | case TargetOpcode::G_BUILD_VECTOR: |
1558 | case TargetOpcode::G_CONCAT_VECTORS: { |
1559 | GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(Val&: MI); |
1560 | |
1561 | if (!DstTy.isFixedVector()) |
1562 | break; |
1563 | |
1564 | bool First = true; |
1565 | for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) { |
1566 | // We know the index we are inserting to, so clear it from Vec check. |
1567 | bool NeedsElt = DemandedElts[Idx]; |
1568 | |
1569 | // Do we demand the inserted element? |
1570 | if (NeedsElt) { |
1571 | Register Src = Merge.getSourceReg(I: Idx); |
1572 | if (First) { |
1573 | computeKnownFPClass(R: Src, Known, InterestedClasses, Depth: Depth + 1); |
1574 | First = false; |
1575 | } else { |
1576 | KnownFPClass Known2; |
1577 | computeKnownFPClass(R: Src, Known&: Known2, InterestedClasses, Depth: Depth + 1); |
1578 | Known |= Known2; |
1579 | } |
1580 | |
1581 | // If we don't know any bits, early out. |
1582 | if (Known.isUnknown()) |
1583 | break; |
1584 | } |
1585 | } |
1586 | |
1587 | break; |
1588 | } |
1589 | case TargetOpcode::G_EXTRACT_VECTOR_ELT: { |
1590 | // Look through extract element. If the index is non-constant or |
1591 | // out-of-range demand all elements, otherwise just the extracted |
1592 | // element. |
1593 | GExtractVectorElement & = cast<GExtractVectorElement>(Val&: MI); |
1594 | Register Vec = Extract.getVectorReg(); |
1595 | Register Idx = Extract.getIndexReg(); |
1596 | |
1597 | auto CIdx = getIConstantVRegVal(VReg: Idx, MRI); |
1598 | |
1599 | LLT VecTy = MRI.getType(Reg: Vec); |
1600 | |
1601 | if (VecTy.isFixedVector()) { |
1602 | unsigned NumElts = VecTy.getNumElements(); |
1603 | APInt DemandedVecElts = APInt::getAllOnes(numBits: NumElts); |
1604 | if (CIdx && CIdx->ult(RHS: NumElts)) |
1605 | DemandedVecElts = APInt::getOneBitSet(numBits: NumElts, BitNo: CIdx->getZExtValue()); |
1606 | return computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known, |
1607 | Depth: Depth + 1); |
1608 | } |
1609 | |
1610 | break; |
1611 | } |
1612 | case TargetOpcode::G_INSERT_VECTOR_ELT: { |
1613 | GInsertVectorElement &Insert = cast<GInsertVectorElement>(Val&: MI); |
1614 | Register Vec = Insert.getVectorReg(); |
1615 | Register Elt = Insert.getElementReg(); |
1616 | Register Idx = Insert.getIndexReg(); |
1617 | |
1618 | LLT VecTy = MRI.getType(Reg: Vec); |
1619 | |
1620 | if (VecTy.isScalableVector()) |
1621 | return; |
1622 | |
1623 | auto CIdx = getIConstantVRegVal(VReg: Idx, MRI); |
1624 | |
1625 | unsigned NumElts = DemandedElts.getBitWidth(); |
1626 | APInt DemandedVecElts = DemandedElts; |
1627 | bool NeedsElt = true; |
1628 | // If we know the index we are inserting to, clear it from Vec check. |
1629 | if (CIdx && CIdx->ult(RHS: NumElts)) { |
1630 | DemandedVecElts.clearBit(BitPosition: CIdx->getZExtValue()); |
1631 | NeedsElt = DemandedElts[CIdx->getZExtValue()]; |
1632 | } |
1633 | |
1634 | // Do we demand the inserted element? |
1635 | if (NeedsElt) { |
1636 | computeKnownFPClass(R: Elt, Known, InterestedClasses, Depth: Depth + 1); |
1637 | // If we don't know any bits, early out. |
1638 | if (Known.isUnknown()) |
1639 | break; |
1640 | } else { |
1641 | Known.KnownFPClasses = fcNone; |
1642 | } |
1643 | |
1644 | // Do we need anymore elements from Vec? |
1645 | if (!DemandedVecElts.isZero()) { |
1646 | KnownFPClass Known2; |
1647 | computeKnownFPClass(R: Vec, DemandedElts: DemandedVecElts, InterestedClasses, Known&: Known2, |
1648 | Depth: Depth + 1); |
1649 | Known |= Known2; |
1650 | } |
1651 | |
1652 | break; |
1653 | } |
1654 | case TargetOpcode::G_SHUFFLE_VECTOR: { |
1655 | // For undef elements, we don't know anything about the common state of |
1656 | // the shuffle result. |
1657 | GShuffleVector &Shuf = cast<GShuffleVector>(Val&: MI); |
1658 | APInt DemandedLHS, DemandedRHS; |
1659 | if (DstTy.isScalableVector()) { |
1660 | assert(DemandedElts == APInt(1, 1)); |
1661 | DemandedLHS = DemandedRHS = DemandedElts; |
1662 | } else { |
1663 | if (!llvm::getShuffleDemandedElts(SrcWidth: DstTy.getNumElements(), Mask: Shuf.getMask(), |
1664 | DemandedElts, DemandedLHS, |
1665 | DemandedRHS)) { |
1666 | Known.resetAll(); |
1667 | return; |
1668 | } |
1669 | } |
1670 | |
1671 | if (!!DemandedLHS) { |
1672 | Register LHS = Shuf.getSrc1Reg(); |
1673 | computeKnownFPClass(R: LHS, DemandedElts: DemandedLHS, InterestedClasses, Known, |
1674 | Depth: Depth + 1); |
1675 | |
1676 | // If we don't know any bits, early out. |
1677 | if (Known.isUnknown()) |
1678 | break; |
1679 | } else { |
1680 | Known.KnownFPClasses = fcNone; |
1681 | } |
1682 | |
1683 | if (!!DemandedRHS) { |
1684 | KnownFPClass Known2; |
1685 | Register RHS = Shuf.getSrc2Reg(); |
1686 | computeKnownFPClass(R: RHS, DemandedElts: DemandedRHS, InterestedClasses, Known&: Known2, |
1687 | Depth: Depth + 1); |
1688 | Known |= Known2; |
1689 | } |
1690 | break; |
1691 | } |
1692 | case TargetOpcode::COPY: { |
1693 | Register Src = MI.getOperand(i: 1).getReg(); |
1694 | |
1695 | if (!Src.isVirtual()) |
1696 | return; |
1697 | |
1698 | computeKnownFPClass(R: Src, DemandedElts, InterestedClasses, Known, Depth: Depth + 1); |
1699 | break; |
1700 | } |
1701 | } |
1702 | } |
1703 | |
1704 | KnownFPClass |
1705 | GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts, |
1706 | FPClassTest InterestedClasses, |
1707 | unsigned Depth) { |
1708 | KnownFPClass KnownClasses; |
1709 | computeKnownFPClass(R, DemandedElts, InterestedClasses, Known&: KnownClasses, Depth); |
1710 | return KnownClasses; |
1711 | } |
1712 | |
1713 | KnownFPClass GISelValueTracking::computeKnownFPClass( |
1714 | Register R, FPClassTest InterestedClasses, unsigned Depth) { |
1715 | KnownFPClass Known; |
1716 | computeKnownFPClass(R, Known, InterestedClasses, Depth); |
1717 | return Known; |
1718 | } |
1719 | |
1720 | KnownFPClass GISelValueTracking::computeKnownFPClass( |
1721 | Register R, const APInt &DemandedElts, uint32_t Flags, |
1722 | FPClassTest InterestedClasses, unsigned Depth) { |
1723 | if (Flags & MachineInstr::MIFlag::FmNoNans) |
1724 | InterestedClasses &= ~fcNan; |
1725 | if (Flags & MachineInstr::MIFlag::FmNoInfs) |
1726 | InterestedClasses &= ~fcInf; |
1727 | |
1728 | KnownFPClass Result = |
1729 | computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth); |
1730 | |
1731 | if (Flags & MachineInstr::MIFlag::FmNoNans) |
1732 | Result.KnownFPClasses &= ~fcNan; |
1733 | if (Flags & MachineInstr::MIFlag::FmNoInfs) |
1734 | Result.KnownFPClasses &= ~fcInf; |
1735 | return Result; |
1736 | } |
1737 | |
1738 | KnownFPClass GISelValueTracking::computeKnownFPClass( |
1739 | Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) { |
1740 | LLT Ty = MRI.getType(Reg: R); |
1741 | APInt DemandedElts = |
1742 | Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
1743 | return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth); |
1744 | } |
1745 | |
1746 | /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 |
1747 | unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1, |
1748 | const APInt &DemandedElts, |
1749 | unsigned Depth) { |
1750 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
1751 | unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth); |
1752 | if (Src1SignBits == 1) |
1753 | return 1; |
1754 | return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits); |
1755 | } |
1756 | |
1757 | /// Compute the known number of sign bits with attached range metadata in the |
1758 | /// memory operand. If this is an extending load, accounts for the behavior of |
1759 | /// the high bits. |
1760 | static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, |
1761 | unsigned TyBits) { |
1762 | const MDNode *Ranges = Ld->getRanges(); |
1763 | if (!Ranges) |
1764 | return 1; |
1765 | |
1766 | ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges); |
1767 | if (TyBits > CR.getBitWidth()) { |
1768 | switch (Ld->getOpcode()) { |
1769 | case TargetOpcode::G_SEXTLOAD: |
1770 | CR = CR.signExtend(BitWidth: TyBits); |
1771 | break; |
1772 | case TargetOpcode::G_ZEXTLOAD: |
1773 | CR = CR.zeroExtend(BitWidth: TyBits); |
1774 | break; |
1775 | default: |
1776 | break; |
1777 | } |
1778 | } |
1779 | |
1780 | return std::min(a: CR.getSignedMin().getNumSignBits(), |
1781 | b: CR.getSignedMax().getNumSignBits()); |
1782 | } |
1783 | |
1784 | unsigned GISelValueTracking::computeNumSignBits(Register R, |
1785 | const APInt &DemandedElts, |
1786 | unsigned Depth) { |
1787 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
1788 | unsigned Opcode = MI.getOpcode(); |
1789 | |
1790 | if (Opcode == TargetOpcode::G_CONSTANT) |
1791 | return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits(); |
1792 | |
1793 | if (Depth == getMaxDepth()) |
1794 | return 1; |
1795 | |
1796 | if (!DemandedElts) |
1797 | return 1; // No demanded elts, better to assume we don't know anything. |
1798 | |
1799 | LLT DstTy = MRI.getType(Reg: R); |
1800 | const unsigned TyBits = DstTy.getScalarSizeInBits(); |
1801 | |
1802 | // Handle the case where this is called on a register that does not have a |
1803 | // type constraint. This is unlikely to occur except by looking through copies |
1804 | // but it is possible for the initial register being queried to be in this |
1805 | // state. |
1806 | if (!DstTy.isValid()) |
1807 | return 1; |
1808 | |
1809 | unsigned FirstAnswer = 1; |
1810 | switch (Opcode) { |
1811 | case TargetOpcode::COPY: { |
1812 | MachineOperand &Src = MI.getOperand(i: 1); |
1813 | if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && |
1814 | MRI.getType(Reg: Src.getReg()).isValid()) { |
1815 | // Don't increment Depth for this one since we didn't do any work. |
1816 | return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth); |
1817 | } |
1818 | |
1819 | return 1; |
1820 | } |
1821 | case TargetOpcode::G_SEXT: { |
1822 | Register Src = MI.getOperand(i: 1).getReg(); |
1823 | LLT SrcTy = MRI.getType(Reg: Src); |
1824 | unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); |
1825 | return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp; |
1826 | } |
1827 | case TargetOpcode::G_ASSERT_SEXT: |
1828 | case TargetOpcode::G_SEXT_INREG: { |
1829 | // Max of the input and what this extends. |
1830 | Register Src = MI.getOperand(i: 1).getReg(); |
1831 | unsigned SrcBits = MI.getOperand(i: 2).getImm(); |
1832 | unsigned InRegBits = TyBits - SrcBits + 1; |
1833 | return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1), |
1834 | b: InRegBits); |
1835 | } |
1836 | case TargetOpcode::G_LOAD: { |
1837 | GLoad *Ld = cast<GLoad>(Val: &MI); |
1838 | if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) |
1839 | break; |
1840 | |
1841 | return computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
1842 | } |
1843 | case TargetOpcode::G_SEXTLOAD: { |
1844 | GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI); |
1845 | |
1846 | // FIXME: We need an in-memory type representation. |
1847 | if (DstTy.isVector()) |
1848 | return 1; |
1849 | |
1850 | unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
1851 | if (NumBits != 1) |
1852 | return NumBits; |
1853 | |
1854 | // e.g. i16->i32 = '17' bits known. |
1855 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
1856 | return TyBits - MMO->getSizeInBits().getValue() + 1; |
1857 | } |
1858 | case TargetOpcode::G_ZEXTLOAD: { |
1859 | GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI); |
1860 | |
1861 | // FIXME: We need an in-memory type representation. |
1862 | if (DstTy.isVector()) |
1863 | return 1; |
1864 | |
1865 | unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
1866 | if (NumBits != 1) |
1867 | return NumBits; |
1868 | |
1869 | // e.g. i16->i32 = '16' bits known. |
1870 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
1871 | return TyBits - MMO->getSizeInBits().getValue(); |
1872 | } |
1873 | case TargetOpcode::G_AND: |
1874 | case TargetOpcode::G_OR: |
1875 | case TargetOpcode::G_XOR: { |
1876 | Register Src1 = MI.getOperand(i: 1).getReg(); |
1877 | unsigned Src1NumSignBits = |
1878 | computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1); |
1879 | if (Src1NumSignBits != 1) { |
1880 | Register Src2 = MI.getOperand(i: 2).getReg(); |
1881 | unsigned Src2NumSignBits = |
1882 | computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1); |
1883 | FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits); |
1884 | } |
1885 | break; |
1886 | } |
1887 | case TargetOpcode::G_TRUNC: { |
1888 | Register Src = MI.getOperand(i: 1).getReg(); |
1889 | LLT SrcTy = MRI.getType(Reg: Src); |
1890 | |
1891 | // Check if the sign bits of source go down as far as the truncated value. |
1892 | unsigned DstTyBits = DstTy.getScalarSizeInBits(); |
1893 | unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); |
1894 | unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1); |
1895 | if (NumSrcSignBits > (NumSrcBits - DstTyBits)) |
1896 | return NumSrcSignBits - (NumSrcBits - DstTyBits); |
1897 | break; |
1898 | } |
1899 | case TargetOpcode::G_SELECT: { |
1900 | return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(), |
1901 | Src1: MI.getOperand(i: 3).getReg(), DemandedElts, |
1902 | Depth: Depth + 1); |
1903 | } |
1904 | case TargetOpcode::G_SMIN: |
1905 | case TargetOpcode::G_SMAX: |
1906 | case TargetOpcode::G_UMIN: |
1907 | case TargetOpcode::G_UMAX: |
1908 | // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX. |
1909 | return computeNumSignBitsMin(Src0: MI.getOperand(i: 1).getReg(), |
1910 | Src1: MI.getOperand(i: 2).getReg(), DemandedElts, |
1911 | Depth: Depth + 1); |
1912 | case TargetOpcode::G_SADDO: |
1913 | case TargetOpcode::G_SADDE: |
1914 | case TargetOpcode::G_UADDO: |
1915 | case TargetOpcode::G_UADDE: |
1916 | case TargetOpcode::G_SSUBO: |
1917 | case TargetOpcode::G_SSUBE: |
1918 | case TargetOpcode::G_USUBO: |
1919 | case TargetOpcode::G_USUBE: |
1920 | case TargetOpcode::G_SMULO: |
1921 | case TargetOpcode::G_UMULO: { |
1922 | // If compares returns 0/-1, all bits are sign bits. |
1923 | // We know that we have an integer-based boolean since these operations |
1924 | // are only available for integer. |
1925 | if (MI.getOperand(i: 1).getReg() == R) { |
1926 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
1927 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
1928 | return TyBits; |
1929 | } |
1930 | |
1931 | break; |
1932 | } |
1933 | case TargetOpcode::G_FCMP: |
1934 | case TargetOpcode::G_ICMP: { |
1935 | bool IsFP = Opcode == TargetOpcode::G_FCMP; |
1936 | if (TyBits == 1) |
1937 | break; |
1938 | auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP); |
1939 | if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) |
1940 | return TyBits; // All bits are sign bits. |
1941 | if (BC == TargetLowering::ZeroOrOneBooleanContent) |
1942 | return TyBits - 1; // Every always-zero bit is a sign bit. |
1943 | break; |
1944 | } |
1945 | case TargetOpcode::G_BUILD_VECTOR: { |
1946 | // Collect the known bits that are shared by every demanded vector element. |
1947 | FirstAnswer = TyBits; |
1948 | APInt SingleDemandedElt(1, 1); |
1949 | for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) { |
1950 | if (!DemandedElts[I]) |
1951 | continue; |
1952 | |
1953 | unsigned Tmp2 = |
1954 | computeNumSignBits(R: MO.getReg(), DemandedElts: SingleDemandedElt, Depth: Depth + 1); |
1955 | FirstAnswer = std::min(a: FirstAnswer, b: Tmp2); |
1956 | |
1957 | // If we don't know any bits, early out. |
1958 | if (FirstAnswer == 1) |
1959 | break; |
1960 | } |
1961 | break; |
1962 | } |
1963 | case TargetOpcode::G_CONCAT_VECTORS: { |
1964 | if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).isScalableVector()) |
1965 | break; |
1966 | FirstAnswer = TyBits; |
1967 | // Determine the minimum number of sign bits across all demanded |
1968 | // elts of the input vectors. Early out if the result is already 1. |
1969 | unsigned NumSubVectorElts = |
1970 | MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getNumElements(); |
1971 | for (const auto &[I, MO] : enumerate(First: drop_begin(RangeOrContainer: MI.operands()))) { |
1972 | APInt DemandedSub = |
1973 | DemandedElts.extractBits(numBits: NumSubVectorElts, bitPosition: I * NumSubVectorElts); |
1974 | if (!DemandedSub) |
1975 | continue; |
1976 | unsigned Tmp2 = computeNumSignBits(R: MO.getReg(), DemandedElts: DemandedSub, Depth: Depth + 1); |
1977 | |
1978 | FirstAnswer = std::min(a: FirstAnswer, b: Tmp2); |
1979 | |
1980 | // If we don't know any bits, early out. |
1981 | if (FirstAnswer == 1) |
1982 | break; |
1983 | } |
1984 | break; |
1985 | } |
1986 | case TargetOpcode::G_SHUFFLE_VECTOR: { |
1987 | // Collect the minimum number of sign bits that are shared by every vector |
1988 | // element referenced by the shuffle. |
1989 | APInt DemandedLHS, DemandedRHS; |
1990 | Register Src1 = MI.getOperand(i: 1).getReg(); |
1991 | unsigned NumElts = MRI.getType(Reg: Src1).getNumElements(); |
1992 | if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: MI.getOperand(i: 3).getShuffleMask(), |
1993 | DemandedElts, DemandedLHS, DemandedRHS)) |
1994 | return 1; |
1995 | |
1996 | if (!!DemandedLHS) |
1997 | FirstAnswer = computeNumSignBits(R: Src1, DemandedElts: DemandedLHS, Depth: Depth + 1); |
1998 | // If we don't know anything, early out and try computeKnownBits fall-back. |
1999 | if (FirstAnswer == 1) |
2000 | break; |
2001 | if (!!DemandedRHS) { |
2002 | unsigned Tmp2 = |
2003 | computeNumSignBits(R: MI.getOperand(i: 2).getReg(), DemandedElts: DemandedRHS, Depth: Depth + 1); |
2004 | FirstAnswer = std::min(a: FirstAnswer, b: Tmp2); |
2005 | } |
2006 | break; |
2007 | } |
2008 | case TargetOpcode::G_SPLAT_VECTOR: { |
2009 | // Check if the sign bits of source go down as far as the truncated value. |
2010 | Register Src = MI.getOperand(i: 1).getReg(); |
2011 | unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts: APInt(1, 1), Depth: Depth + 1); |
2012 | unsigned NumSrcBits = MRI.getType(Reg: Src).getSizeInBits(); |
2013 | if (NumSrcSignBits > (NumSrcBits - TyBits)) |
2014 | return NumSrcSignBits - (NumSrcBits - TyBits); |
2015 | break; |
2016 | } |
2017 | case TargetOpcode::G_INTRINSIC: |
2018 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
2019 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
2020 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
2021 | default: { |
2022 | unsigned NumBits = |
2023 | TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth); |
2024 | if (NumBits > 1) |
2025 | FirstAnswer = std::max(a: FirstAnswer, b: NumBits); |
2026 | break; |
2027 | } |
2028 | } |
2029 | |
2030 | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
2031 | // use this information. |
2032 | KnownBits Known = getKnownBits(R, DemandedElts, Depth); |
2033 | APInt Mask; |
2034 | if (Known.isNonNegative()) { // sign bit is 0 |
2035 | Mask = Known.Zero; |
2036 | } else if (Known.isNegative()) { // sign bit is 1; |
2037 | Mask = Known.One; |
2038 | } else { |
2039 | // Nothing known. |
2040 | return FirstAnswer; |
2041 | } |
2042 | |
2043 | // Okay, we know that the sign bit in Mask is set. Use CLO to determine |
2044 | // the number of identical bits in the top of the input value. |
2045 | Mask <<= Mask.getBitWidth() - TyBits; |
2046 | return std::max(a: FirstAnswer, b: Mask.countl_one()); |
2047 | } |
2048 | |
2049 | unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) { |
2050 | LLT Ty = MRI.getType(Reg: R); |
2051 | APInt DemandedElts = |
2052 | Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
2053 | return computeNumSignBits(R, DemandedElts, Depth); |
2054 | } |
2055 | |
2056 | void GISelValueTrackingAnalysisLegacy::getAnalysisUsage( |
2057 | AnalysisUsage &AU) const { |
2058 | AU.setPreservesAll(); |
2059 | MachineFunctionPass::getAnalysisUsage(AU); |
2060 | } |
2061 | |
2062 | bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction( |
2063 | MachineFunction &MF) { |
2064 | return false; |
2065 | } |
2066 | |
2067 | GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) { |
2068 | if (!Info) { |
2069 | unsigned MaxDepth = |
2070 | MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; |
2071 | Info = std::make_unique<GISelValueTracking>(args&: MF, args&: MaxDepth); |
2072 | } |
2073 | return *Info; |
2074 | } |
2075 | |
2076 | AnalysisKey GISelValueTrackingAnalysis::Key; |
2077 | |
2078 | GISelValueTracking |
2079 | GISelValueTrackingAnalysis::run(MachineFunction &MF, |
2080 | MachineFunctionAnalysisManager &MFAM) { |
2081 | return Result(MF); |
2082 | } |
2083 | |
2084 | PreservedAnalyses |
2085 | GISelValueTrackingPrinterPass::run(MachineFunction &MF, |
2086 | MachineFunctionAnalysisManager &MFAM) { |
2087 | auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(IR&: MF); |
2088 | const auto &MRI = MF.getRegInfo(); |
2089 | OS << "name: " ; |
2090 | MF.getFunction().printAsOperand(O&: OS, /*PrintType=*/false); |
2091 | OS << '\n'; |
2092 | |
2093 | for (MachineBasicBlock &BB : MF) { |
2094 | for (MachineInstr &MI : BB) { |
2095 | for (MachineOperand &MO : MI.defs()) { |
2096 | if (!MO.isReg() || MO.getReg().isPhysical()) |
2097 | continue; |
2098 | Register Reg = MO.getReg(); |
2099 | if (!MRI.getType(Reg).isValid()) |
2100 | continue; |
2101 | KnownBits Known = VTA.getKnownBits(R: Reg); |
2102 | unsigned SignedBits = VTA.computeNumSignBits(R: Reg); |
2103 | OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits |
2104 | << '\n'; |
2105 | }; |
2106 | } |
2107 | } |
2108 | return PreservedAnalyses::all(); |
2109 | } |
2110 | |