1 | //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// Provides analysis for querying information about KnownBits during GISel |
10 | /// passes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/Analysis/ValueTracking.h" |
16 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
17 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
18 | #include "llvm/CodeGen/MachineFrameInfo.h" |
19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | #include "llvm/CodeGen/TargetLowering.h" |
21 | #include "llvm/CodeGen/TargetOpcodes.h" |
22 | #include "llvm/IR/ConstantRange.h" |
23 | #include "llvm/IR/Module.h" |
24 | #include "llvm/Target/TargetMachine.h" |
25 | |
26 | #define DEBUG_TYPE "gisel-known-bits" |
27 | |
28 | using namespace llvm; |
29 | |
30 | char llvm::GISelKnownBitsAnalysis::ID = 0; |
31 | |
32 | INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, |
33 | "Analysis for ComputingKnownBits" , false, true) |
34 | |
35 | GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) |
36 | : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), |
37 | DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} |
38 | |
39 | Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { |
40 | const MachineInstr *MI = MRI.getVRegDef(Reg: R); |
41 | switch (MI->getOpcode()) { |
42 | case TargetOpcode::COPY: |
43 | return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth); |
44 | case TargetOpcode::G_ASSERT_ALIGN: { |
45 | // TODO: Min with source |
46 | return Align(MI->getOperand(i: 2).getImm()); |
47 | } |
48 | case TargetOpcode::G_FRAME_INDEX: { |
49 | int FrameIdx = MI->getOperand(i: 1).getIndex(); |
50 | return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx); |
51 | } |
52 | case TargetOpcode::G_INTRINSIC: |
53 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
54 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
55 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
56 | default: |
57 | return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1); |
58 | } |
59 | } |
60 | |
61 | KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { |
62 | assert(MI.getNumExplicitDefs() == 1 && |
63 | "expected single return generic instruction" ); |
64 | return getKnownBits(R: MI.getOperand(i: 0).getReg()); |
65 | } |
66 | |
67 | KnownBits GISelKnownBits::getKnownBits(Register R) { |
68 | const LLT Ty = MRI.getType(Reg: R); |
69 | // Since the number of lanes in a scalable vector is unknown at compile time, |
70 | // we track one bit which is implicitly broadcast to all lanes. This means |
71 | // that all lanes in a scalable vector are considered demanded. |
72 | APInt DemandedElts = |
73 | Ty.isFixedVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
74 | return getKnownBits(R, DemandedElts); |
75 | } |
76 | |
77 | KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, |
78 | unsigned Depth) { |
79 | // For now, we only maintain the cache during one request. |
80 | assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared" ); |
81 | |
82 | KnownBits Known; |
83 | computeKnownBitsImpl(R, Known, DemandedElts, Depth); |
84 | ComputeKnownBitsCache.clear(); |
85 | return Known; |
86 | } |
87 | |
88 | bool GISelKnownBits::signBitIsZero(Register R) { |
89 | LLT Ty = MRI.getType(Reg: R); |
90 | unsigned BitWidth = Ty.getScalarSizeInBits(); |
91 | return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth)); |
92 | } |
93 | |
94 | APInt GISelKnownBits::getKnownZeroes(Register R) { |
95 | return getKnownBits(R).Zero; |
96 | } |
97 | |
98 | APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } |
99 | |
100 | LLVM_ATTRIBUTE_UNUSED static void |
101 | dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { |
102 | dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth |
103 | << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" |
104 | << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n" |
105 | << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false) |
106 | << "\n" |
107 | << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false) |
108 | << "\n" ; |
109 | } |
110 | |
111 | /// Compute known bits for the intersection of \p Src0 and \p Src1 |
112 | void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, |
113 | KnownBits &Known, |
114 | const APInt &DemandedElts, |
115 | unsigned Depth) { |
116 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
117 | computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth); |
118 | |
119 | // If we don't know any bits, early out. |
120 | if (Known.isUnknown()) |
121 | return; |
122 | |
123 | KnownBits Known2; |
124 | computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth); |
125 | |
126 | // Only known if known in both the LHS and RHS. |
127 | Known = Known.intersectWith(RHS: Known2); |
128 | } |
129 | |
130 | // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is |
131 | // created using Width. Use this function when the inputs are KnownBits |
132 | // objects. TODO: Move this KnownBits.h if this is usable in more cases. |
133 | static KnownBits (unsigned BitWidth, const KnownBits &SrcOpKnown, |
134 | const KnownBits &OffsetKnown, |
135 | const KnownBits &WidthKnown) { |
136 | KnownBits Mask(BitWidth); |
137 | Mask.Zero = APInt::getBitsSetFrom( |
138 | numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth)); |
139 | Mask.One = APInt::getLowBitsSet( |
140 | numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth)); |
141 | return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask; |
142 | } |
143 | |
144 | void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, |
145 | const APInt &DemandedElts, |
146 | unsigned Depth) { |
147 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
148 | unsigned Opcode = MI.getOpcode(); |
149 | LLT DstTy = MRI.getType(Reg: R); |
150 | |
151 | // Handle the case where this is called on a register that does not have a |
152 | // type constraint (i.e. it has a register class constraint instead). This is |
153 | // unlikely to occur except by looking through copies but it is possible for |
154 | // the initial register being queried to be in this state. |
155 | if (!DstTy.isValid()) { |
156 | Known = KnownBits(); |
157 | return; |
158 | } |
159 | |
160 | unsigned BitWidth = DstTy.getScalarSizeInBits(); |
161 | auto CacheEntry = ComputeKnownBitsCache.find(Val: R); |
162 | if (CacheEntry != ComputeKnownBitsCache.end()) { |
163 | Known = CacheEntry->second; |
164 | LLVM_DEBUG(dbgs() << "Cache hit at " ); |
165 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
166 | assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match" ); |
167 | return; |
168 | } |
169 | Known = KnownBits(BitWidth); // Don't know anything |
170 | |
171 | // Depth may get bigger than max depth if it gets passed to a different |
172 | // GISelKnownBits object. |
173 | // This may happen when say a generic part uses a GISelKnownBits object |
174 | // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr |
175 | // which creates a new GISelKnownBits object with a different and smaller |
176 | // depth. If we just check for equality, we would never exit if the depth |
177 | // that is passed down to the target specific GISelKnownBits object is |
178 | // already bigger than its max depth. |
179 | if (Depth >= getMaxDepth()) |
180 | return; |
181 | |
182 | if (!DemandedElts) |
183 | return; // No demanded elts, better to assume we don't know anything. |
184 | |
185 | KnownBits Known2; |
186 | |
187 | switch (Opcode) { |
188 | default: |
189 | TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI, |
190 | Depth); |
191 | break; |
192 | case TargetOpcode::G_BUILD_VECTOR: { |
193 | // Collect the known bits that are shared by every demanded vector element. |
194 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
195 | for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { |
196 | if (!DemandedElts[i]) |
197 | continue; |
198 | |
199 | computeKnownBitsImpl(R: MI.getOperand(i: i + 1).getReg(), Known&: Known2, DemandedElts, |
200 | Depth: Depth + 1); |
201 | |
202 | // Known bits are the values that are shared by every demanded element. |
203 | Known = Known.intersectWith(RHS: Known2); |
204 | |
205 | // If we don't know any bits, early out. |
206 | if (Known.isUnknown()) |
207 | break; |
208 | } |
209 | break; |
210 | } |
211 | case TargetOpcode::COPY: |
212 | case TargetOpcode::G_PHI: |
213 | case TargetOpcode::PHI: { |
214 | Known.One = APInt::getAllOnes(numBits: BitWidth); |
215 | Known.Zero = APInt::getAllOnes(numBits: BitWidth); |
216 | // Destination registers should not have subregisters at this |
217 | // point of the pipeline, otherwise the main live-range will be |
218 | // defined more than once, which is against SSA. |
219 | assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?" ); |
220 | // Record in the cache that we know nothing for MI. |
221 | // This will get updated later and in the meantime, if we reach that |
222 | // phi again, because of a loop, we will cut the search thanks to this |
223 | // cache entry. |
224 | // We could actually build up more information on the phi by not cutting |
225 | // the search, but that additional information is more a side effect |
226 | // than an intended choice. |
227 | // Therefore, for now, save on compile time until we derive a proper way |
228 | // to derive known bits for PHIs within loops. |
229 | ComputeKnownBitsCache[R] = KnownBits(BitWidth); |
230 | // PHI's operand are a mix of registers and basic blocks interleaved. |
231 | // We only care about the register ones. |
232 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
233 | const MachineOperand &Src = MI.getOperand(i: Idx); |
234 | Register SrcReg = Src.getReg(); |
235 | // Look through trivial copies and phis but don't look through trivial |
236 | // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits |
237 | // analysis is currently unable to determine the bit width of a |
238 | // register class. |
239 | // |
240 | // We can't use NoSubRegister by name as it's defined by each target but |
241 | // it's always defined to be 0 by tablegen. |
242 | if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && |
243 | MRI.getType(Reg: SrcReg).isValid()) { |
244 | // For COPYs we don't do anything, don't increase the depth. |
245 | computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts, |
246 | Depth: Depth + (Opcode != TargetOpcode::COPY)); |
247 | Known = Known.intersectWith(RHS: Known2); |
248 | // If we reach a point where we don't know anything |
249 | // just stop looking through the operands. |
250 | if (Known.isUnknown()) |
251 | break; |
252 | } else { |
253 | // We know nothing. |
254 | Known = KnownBits(BitWidth); |
255 | break; |
256 | } |
257 | } |
258 | break; |
259 | } |
260 | case TargetOpcode::G_CONSTANT: { |
261 | Known = KnownBits::makeConstant(C: MI.getOperand(i: 1).getCImm()->getValue()); |
262 | break; |
263 | } |
264 | case TargetOpcode::G_FRAME_INDEX: { |
265 | int FrameIdx = MI.getOperand(i: 1).getIndex(); |
266 | TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF); |
267 | break; |
268 | } |
269 | case TargetOpcode::G_SUB: { |
270 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
271 | Depth: Depth + 1); |
272 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
273 | Depth: Depth + 1); |
274 | Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false, |
275 | /* NUW=*/false, LHS: Known, RHS: Known2); |
276 | break; |
277 | } |
278 | case TargetOpcode::G_XOR: { |
279 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
280 | Depth: Depth + 1); |
281 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
282 | Depth: Depth + 1); |
283 | |
284 | Known ^= Known2; |
285 | break; |
286 | } |
287 | case TargetOpcode::G_PTR_ADD: { |
288 | if (DstTy.isVector()) |
289 | break; |
290 | // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? |
291 | LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg()); |
292 | if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace())) |
293 | break; |
294 | [[fallthrough]]; |
295 | } |
296 | case TargetOpcode::G_ADD: { |
297 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
298 | Depth: Depth + 1); |
299 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
300 | Depth: Depth + 1); |
301 | Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false, |
302 | /* NUW=*/false, LHS: Known, RHS: Known2); |
303 | break; |
304 | } |
305 | case TargetOpcode::G_AND: { |
306 | // If either the LHS or the RHS are Zero, the result is zero. |
307 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
308 | Depth: Depth + 1); |
309 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
310 | Depth: Depth + 1); |
311 | |
312 | Known &= Known2; |
313 | break; |
314 | } |
315 | case TargetOpcode::G_OR: { |
316 | // If either the LHS or the RHS are Zero, the result is zero. |
317 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
318 | Depth: Depth + 1); |
319 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
320 | Depth: Depth + 1); |
321 | |
322 | Known |= Known2; |
323 | break; |
324 | } |
325 | case TargetOpcode::G_MUL: { |
326 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
327 | Depth: Depth + 1); |
328 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
329 | Depth: Depth + 1); |
330 | Known = KnownBits::mul(LHS: Known, RHS: Known2); |
331 | break; |
332 | } |
333 | case TargetOpcode::G_SELECT: { |
334 | computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(), |
335 | Known, DemandedElts, Depth: Depth + 1); |
336 | break; |
337 | } |
338 | case TargetOpcode::G_SMIN: { |
339 | // TODO: Handle clamp pattern with number of sign bits |
340 | KnownBits KnownRHS; |
341 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
342 | Depth: Depth + 1); |
343 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
344 | Depth: Depth + 1); |
345 | Known = KnownBits::smin(LHS: Known, RHS: KnownRHS); |
346 | break; |
347 | } |
348 | case TargetOpcode::G_SMAX: { |
349 | // TODO: Handle clamp pattern with number of sign bits |
350 | KnownBits KnownRHS; |
351 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
352 | Depth: Depth + 1); |
353 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
354 | Depth: Depth + 1); |
355 | Known = KnownBits::smax(LHS: Known, RHS: KnownRHS); |
356 | break; |
357 | } |
358 | case TargetOpcode::G_UMIN: { |
359 | KnownBits KnownRHS; |
360 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
361 | DemandedElts, Depth: Depth + 1); |
362 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
363 | DemandedElts, Depth: Depth + 1); |
364 | Known = KnownBits::umin(LHS: Known, RHS: KnownRHS); |
365 | break; |
366 | } |
367 | case TargetOpcode::G_UMAX: { |
368 | KnownBits KnownRHS; |
369 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
370 | DemandedElts, Depth: Depth + 1); |
371 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
372 | DemandedElts, Depth: Depth + 1); |
373 | Known = KnownBits::umax(LHS: Known, RHS: KnownRHS); |
374 | break; |
375 | } |
376 | case TargetOpcode::G_FCMP: |
377 | case TargetOpcode::G_ICMP: { |
378 | if (DstTy.isVector()) |
379 | break; |
380 | if (TL.getBooleanContents(isVec: DstTy.isVector(), |
381 | isFloat: Opcode == TargetOpcode::G_FCMP) == |
382 | TargetLowering::ZeroOrOneBooleanContent && |
383 | BitWidth > 1) |
384 | Known.Zero.setBitsFrom(1); |
385 | break; |
386 | } |
387 | case TargetOpcode::G_SEXT: { |
388 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
389 | Depth: Depth + 1); |
390 | // If the sign bit is known to be zero or one, then sext will extend |
391 | // it to the top bits, else it will just zext. |
392 | Known = Known.sext(BitWidth); |
393 | break; |
394 | } |
395 | case TargetOpcode::G_ASSERT_SEXT: |
396 | case TargetOpcode::G_SEXT_INREG: { |
397 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
398 | Depth: Depth + 1); |
399 | Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm()); |
400 | break; |
401 | } |
402 | case TargetOpcode::G_ANYEXT: { |
403 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
404 | Depth: Depth + 1); |
405 | Known = Known.anyext(BitWidth); |
406 | break; |
407 | } |
408 | case TargetOpcode::G_LOAD: { |
409 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
410 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
411 | if (const MDNode *Ranges = MMO->getRanges()) |
412 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
413 | Known = KnownRange.anyext(BitWidth: Known.getBitWidth()); |
414 | break; |
415 | } |
416 | case TargetOpcode::G_SEXTLOAD: |
417 | case TargetOpcode::G_ZEXTLOAD: { |
418 | if (DstTy.isVector()) |
419 | break; |
420 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
421 | KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); |
422 | if (const MDNode *Ranges = MMO->getRanges()) |
423 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known&: KnownRange); |
424 | Known = Opcode == TargetOpcode::G_SEXTLOAD |
425 | ? KnownRange.sext(BitWidth: Known.getBitWidth()) |
426 | : KnownRange.zext(BitWidth: Known.getBitWidth()); |
427 | break; |
428 | } |
429 | case TargetOpcode::G_ASHR: { |
430 | KnownBits LHSKnown, RHSKnown; |
431 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
432 | Depth: Depth + 1); |
433 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
434 | Depth: Depth + 1); |
435 | Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown); |
436 | break; |
437 | } |
438 | case TargetOpcode::G_LSHR: { |
439 | KnownBits LHSKnown, RHSKnown; |
440 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
441 | Depth: Depth + 1); |
442 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
443 | Depth: Depth + 1); |
444 | Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown); |
445 | break; |
446 | } |
447 | case TargetOpcode::G_SHL: { |
448 | KnownBits LHSKnown, RHSKnown; |
449 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
450 | Depth: Depth + 1); |
451 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
452 | Depth: Depth + 1); |
453 | Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown); |
454 | break; |
455 | } |
456 | case TargetOpcode::G_INTTOPTR: |
457 | case TargetOpcode::G_PTRTOINT: |
458 | if (DstTy.isVector()) |
459 | break; |
460 | // Fall through and handle them the same as zext/trunc. |
461 | [[fallthrough]]; |
462 | case TargetOpcode::G_ASSERT_ZEXT: |
463 | case TargetOpcode::G_ZEXT: |
464 | case TargetOpcode::G_TRUNC: { |
465 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
466 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
467 | unsigned SrcBitWidth; |
468 | |
469 | // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. |
470 | if (Opcode == TargetOpcode::G_ASSERT_ZEXT) |
471 | SrcBitWidth = MI.getOperand(i: 2).getImm(); |
472 | else { |
473 | SrcBitWidth = SrcTy.isPointer() |
474 | ? DL.getIndexSizeInBits(AS: SrcTy.getAddressSpace()) |
475 | : SrcTy.getSizeInBits(); |
476 | } |
477 | assert(SrcBitWidth && "SrcBitWidth can't be zero" ); |
478 | Known = Known.zextOrTrunc(BitWidth: SrcBitWidth); |
479 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
480 | Known = Known.zextOrTrunc(BitWidth); |
481 | if (BitWidth > SrcBitWidth) |
482 | Known.Zero.setBitsFrom(SrcBitWidth); |
483 | break; |
484 | } |
485 | case TargetOpcode::G_ASSERT_ALIGN: { |
486 | int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm()); |
487 | |
488 | // TODO: Should use maximum with source |
489 | // If a node is guaranteed to be aligned, set low zero bits accordingly as |
490 | // well as clearing one bits. |
491 | Known.Zero.setLowBits(LogOfAlign); |
492 | Known.One.clearLowBits(loBits: LogOfAlign); |
493 | break; |
494 | } |
495 | case TargetOpcode::G_MERGE_VALUES: { |
496 | unsigned NumOps = MI.getNumOperands(); |
497 | unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
498 | |
499 | for (unsigned I = 0; I != NumOps - 1; ++I) { |
500 | KnownBits SrcOpKnown; |
501 | computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown, |
502 | DemandedElts, Depth: Depth + 1); |
503 | Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize); |
504 | } |
505 | break; |
506 | } |
507 | case TargetOpcode::G_UNMERGE_VALUES: { |
508 | if (DstTy.isVector()) |
509 | break; |
510 | unsigned NumOps = MI.getNumOperands(); |
511 | Register SrcReg = MI.getOperand(i: NumOps - 1).getReg(); |
512 | if (MRI.getType(Reg: SrcReg).isVector()) |
513 | return; // TODO: Handle vectors. |
514 | |
515 | KnownBits SrcOpKnown; |
516 | computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts, Depth: Depth + 1); |
517 | |
518 | // Figure out the result operand index |
519 | unsigned DstIdx = 0; |
520 | for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R; |
521 | ++DstIdx) |
522 | ; |
523 | |
524 | Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx); |
525 | break; |
526 | } |
527 | case TargetOpcode::G_BSWAP: { |
528 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
529 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
530 | Known = Known.byteSwap(); |
531 | break; |
532 | } |
533 | case TargetOpcode::G_BITREVERSE: { |
534 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
535 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
536 | Known = Known.reverseBits(); |
537 | break; |
538 | } |
539 | case TargetOpcode::G_CTPOP: { |
540 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
541 | Depth: Depth + 1); |
542 | // We can bound the space the count needs. Also, bits known to be zero can't |
543 | // contribute to the population. |
544 | unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
545 | unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet); |
546 | Known.Zero.setBitsFrom(LowBits); |
547 | // TODO: we could bound Known.One using the lower bound on the number of |
548 | // bits which might be set provided by popcnt KnownOne2. |
549 | break; |
550 | } |
551 | case TargetOpcode::G_UBFX: { |
552 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
553 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
554 | Depth: Depth + 1); |
555 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
556 | Depth: Depth + 1); |
557 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
558 | Depth: Depth + 1); |
559 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
560 | break; |
561 | } |
562 | case TargetOpcode::G_SBFX: { |
563 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
564 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
565 | Depth: Depth + 1); |
566 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
567 | Depth: Depth + 1); |
568 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
569 | Depth: Depth + 1); |
570 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
571 | // Sign extend the extracted value using shift left and arithmetic shift |
572 | // right. |
573 | KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth)); |
574 | KnownBits ShiftKnown = KnownBits::computeForAddSub( |
575 | /*Add=*/false, /*NSW=*/false, /* NUW=*/false, LHS: ExtKnown, RHS: WidthKnown); |
576 | Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown); |
577 | break; |
578 | } |
579 | case TargetOpcode::G_UADDO: |
580 | case TargetOpcode::G_UADDE: |
581 | case TargetOpcode::G_SADDO: |
582 | case TargetOpcode::G_SADDE: |
583 | case TargetOpcode::G_USUBO: |
584 | case TargetOpcode::G_USUBE: |
585 | case TargetOpcode::G_SSUBO: |
586 | case TargetOpcode::G_SSUBE: |
587 | case TargetOpcode::G_UMULO: |
588 | case TargetOpcode::G_SMULO: { |
589 | if (MI.getOperand(i: 1).getReg() == R) { |
590 | // If we know the result of a compare has the top bits zero, use this |
591 | // info. |
592 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
593 | TargetLowering::ZeroOrOneBooleanContent && |
594 | BitWidth > 1) |
595 | Known.Zero.setBitsFrom(1); |
596 | } |
597 | break; |
598 | } |
599 | case TargetOpcode::G_CTLZ: |
600 | case TargetOpcode::G_CTLZ_ZERO_UNDEF: { |
601 | KnownBits SrcOpKnown; |
602 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
603 | Depth: Depth + 1); |
604 | // If we have a known 1, its position is our upper bound. |
605 | unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); |
606 | unsigned LowBits = llvm::bit_width(Value: PossibleLZ); |
607 | Known.Zero.setBitsFrom(LowBits); |
608 | break; |
609 | } |
610 | } |
611 | |
612 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
613 | |
614 | // Update the cache. |
615 | ComputeKnownBitsCache[R] = Known; |
616 | } |
617 | |
618 | /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 |
619 | unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, |
620 | const APInt &DemandedElts, |
621 | unsigned Depth) { |
622 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
623 | unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth); |
624 | if (Src1SignBits == 1) |
625 | return 1; |
626 | return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits); |
627 | } |
628 | |
629 | /// Compute the known number of sign bits with attached range metadata in the |
630 | /// memory operand. If this is an extending load, accounts for the behavior of |
631 | /// the high bits. |
632 | static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, |
633 | unsigned TyBits) { |
634 | const MDNode *Ranges = Ld->getRanges(); |
635 | if (!Ranges) |
636 | return 1; |
637 | |
638 | ConstantRange CR = getConstantRangeFromMetadata(RangeMD: *Ranges); |
639 | if (TyBits > CR.getBitWidth()) { |
640 | switch (Ld->getOpcode()) { |
641 | case TargetOpcode::G_SEXTLOAD: |
642 | CR = CR.signExtend(BitWidth: TyBits); |
643 | break; |
644 | case TargetOpcode::G_ZEXTLOAD: |
645 | CR = CR.zeroExtend(BitWidth: TyBits); |
646 | break; |
647 | default: |
648 | break; |
649 | } |
650 | } |
651 | |
652 | return std::min(a: CR.getSignedMin().getNumSignBits(), |
653 | b: CR.getSignedMax().getNumSignBits()); |
654 | } |
655 | |
656 | unsigned GISelKnownBits::computeNumSignBits(Register R, |
657 | const APInt &DemandedElts, |
658 | unsigned Depth) { |
659 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
660 | unsigned Opcode = MI.getOpcode(); |
661 | |
662 | if (Opcode == TargetOpcode::G_CONSTANT) |
663 | return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits(); |
664 | |
665 | if (Depth == getMaxDepth()) |
666 | return 1; |
667 | |
668 | if (!DemandedElts) |
669 | return 1; // No demanded elts, better to assume we don't know anything. |
670 | |
671 | LLT DstTy = MRI.getType(Reg: R); |
672 | const unsigned TyBits = DstTy.getScalarSizeInBits(); |
673 | |
674 | // Handle the case where this is called on a register that does not have a |
675 | // type constraint. This is unlikely to occur except by looking through copies |
676 | // but it is possible for the initial register being queried to be in this |
677 | // state. |
678 | if (!DstTy.isValid()) |
679 | return 1; |
680 | |
681 | unsigned FirstAnswer = 1; |
682 | switch (Opcode) { |
683 | case TargetOpcode::COPY: { |
684 | MachineOperand &Src = MI.getOperand(i: 1); |
685 | if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && |
686 | MRI.getType(Reg: Src.getReg()).isValid()) { |
687 | // Don't increment Depth for this one since we didn't do any work. |
688 | return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth); |
689 | } |
690 | |
691 | return 1; |
692 | } |
693 | case TargetOpcode::G_SEXT: { |
694 | Register Src = MI.getOperand(i: 1).getReg(); |
695 | LLT SrcTy = MRI.getType(Reg: Src); |
696 | unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); |
697 | return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp; |
698 | } |
699 | case TargetOpcode::G_ASSERT_SEXT: |
700 | case TargetOpcode::G_SEXT_INREG: { |
701 | // Max of the input and what this extends. |
702 | Register Src = MI.getOperand(i: 1).getReg(); |
703 | unsigned SrcBits = MI.getOperand(i: 2).getImm(); |
704 | unsigned InRegBits = TyBits - SrcBits + 1; |
705 | return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1), b: InRegBits); |
706 | } |
707 | case TargetOpcode::G_LOAD: { |
708 | GLoad *Ld = cast<GLoad>(Val: &MI); |
709 | if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) |
710 | break; |
711 | |
712 | return computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
713 | } |
714 | case TargetOpcode::G_SEXTLOAD: { |
715 | GSExtLoad *Ld = cast<GSExtLoad>(Val: &MI); |
716 | |
717 | // FIXME: We need an in-memory type representation. |
718 | if (DstTy.isVector()) |
719 | return 1; |
720 | |
721 | unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
722 | if (NumBits != 1) |
723 | return NumBits; |
724 | |
725 | // e.g. i16->i32 = '17' bits known. |
726 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
727 | return TyBits - MMO->getSizeInBits().getValue() + 1; |
728 | } |
729 | case TargetOpcode::G_ZEXTLOAD: { |
730 | GZExtLoad *Ld = cast<GZExtLoad>(Val: &MI); |
731 | |
732 | // FIXME: We need an in-memory type representation. |
733 | if (DstTy.isVector()) |
734 | return 1; |
735 | |
736 | unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); |
737 | if (NumBits != 1) |
738 | return NumBits; |
739 | |
740 | // e.g. i16->i32 = '16' bits known. |
741 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
742 | return TyBits - MMO->getSizeInBits().getValue(); |
743 | } |
744 | case TargetOpcode::G_AND: |
745 | case TargetOpcode::G_OR: |
746 | case TargetOpcode::G_XOR: { |
747 | Register Src1 = MI.getOperand(i: 1).getReg(); |
748 | unsigned Src1NumSignBits = |
749 | computeNumSignBits(R: Src1, DemandedElts, Depth: Depth + 1); |
750 | if (Src1NumSignBits != 1) { |
751 | Register Src2 = MI.getOperand(i: 2).getReg(); |
752 | unsigned Src2NumSignBits = |
753 | computeNumSignBits(R: Src2, DemandedElts, Depth: Depth + 1); |
754 | FirstAnswer = std::min(a: Src1NumSignBits, b: Src2NumSignBits); |
755 | } |
756 | break; |
757 | } |
758 | case TargetOpcode::G_TRUNC: { |
759 | Register Src = MI.getOperand(i: 1).getReg(); |
760 | LLT SrcTy = MRI.getType(Reg: Src); |
761 | |
762 | // Check if the sign bits of source go down as far as the truncated value. |
763 | unsigned DstTyBits = DstTy.getScalarSizeInBits(); |
764 | unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); |
765 | unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1); |
766 | if (NumSrcSignBits > (NumSrcBits - DstTyBits)) |
767 | return NumSrcSignBits - (NumSrcBits - DstTyBits); |
768 | break; |
769 | } |
770 | case TargetOpcode::G_SELECT: { |
771 | return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(), |
772 | Src1: MI.getOperand(i: 3).getReg(), DemandedElts, |
773 | Depth: Depth + 1); |
774 | } |
775 | case TargetOpcode::G_SADDO: |
776 | case TargetOpcode::G_SADDE: |
777 | case TargetOpcode::G_UADDO: |
778 | case TargetOpcode::G_UADDE: |
779 | case TargetOpcode::G_SSUBO: |
780 | case TargetOpcode::G_SSUBE: |
781 | case TargetOpcode::G_USUBO: |
782 | case TargetOpcode::G_USUBE: |
783 | case TargetOpcode::G_SMULO: |
784 | case TargetOpcode::G_UMULO: { |
785 | // If compares returns 0/-1, all bits are sign bits. |
786 | // We know that we have an integer-based boolean since these operations |
787 | // are only available for integer. |
788 | if (MI.getOperand(i: 1).getReg() == R) { |
789 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
790 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
791 | return TyBits; |
792 | } |
793 | |
794 | break; |
795 | } |
796 | case TargetOpcode::G_FCMP: |
797 | case TargetOpcode::G_ICMP: { |
798 | bool IsFP = Opcode == TargetOpcode::G_FCMP; |
799 | if (TyBits == 1) |
800 | break; |
801 | auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP); |
802 | if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) |
803 | return TyBits; // All bits are sign bits. |
804 | if (BC == TargetLowering::ZeroOrOneBooleanContent) |
805 | return TyBits - 1; // Every always-zero bit is a sign bit. |
806 | break; |
807 | } |
808 | case TargetOpcode::G_INTRINSIC: |
809 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
810 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
811 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
812 | default: { |
813 | unsigned NumBits = |
814 | TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth); |
815 | if (NumBits > 1) |
816 | FirstAnswer = std::max(a: FirstAnswer, b: NumBits); |
817 | break; |
818 | } |
819 | } |
820 | |
821 | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
822 | // use this information. |
823 | KnownBits Known = getKnownBits(R, DemandedElts, Depth); |
824 | APInt Mask; |
825 | if (Known.isNonNegative()) { // sign bit is 0 |
826 | Mask = Known.Zero; |
827 | } else if (Known.isNegative()) { // sign bit is 1; |
828 | Mask = Known.One; |
829 | } else { |
830 | // Nothing known. |
831 | return FirstAnswer; |
832 | } |
833 | |
834 | // Okay, we know that the sign bit in Mask is set. Use CLO to determine |
835 | // the number of identical bits in the top of the input value. |
836 | Mask <<= Mask.getBitWidth() - TyBits; |
837 | return std::max(a: FirstAnswer, b: Mask.countl_one()); |
838 | } |
839 | |
840 | unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { |
841 | LLT Ty = MRI.getType(Reg: R); |
842 | APInt DemandedElts = |
843 | Ty.isVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
844 | return computeNumSignBits(R, DemandedElts, Depth); |
845 | } |
846 | |
847 | void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
848 | AU.setPreservesAll(); |
849 | MachineFunctionPass::getAnalysisUsage(AU); |
850 | } |
851 | |
852 | bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { |
853 | return false; |
854 | } |
855 | |
856 | GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { |
857 | if (!Info) { |
858 | unsigned MaxDepth = |
859 | MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; |
860 | Info = std::make_unique<GISelKnownBits>(args&: MF, args&: MaxDepth); |
861 | } |
862 | return *Info; |
863 | } |
864 | |