1//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUCombinerHelper.h"
10#include "GCNSubtarget.h"
11#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
13#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14#include "llvm/IR/IntrinsicsAMDGPU.h"
15#include "llvm/Target/TargetMachine.h"
16
17using namespace llvm;
18using namespace MIPatternMatch;
19
20AMDGPUCombinerHelper::AMDGPUCombinerHelper(
21 GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
22 GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI,
23 const GCNSubtarget &STI)
24 : CombinerHelper(Observer, B, IsPreLegalize, VT, MDT, LI), STI(STI),
25 TII(*STI.getInstrInfo()) {}
26
27LLVM_READNONE
28static bool fnegFoldsIntoMI(const MachineInstr &MI) {
29 switch (MI.getOpcode()) {
30 case AMDGPU::G_FADD:
31 case AMDGPU::G_FSUB:
32 case AMDGPU::G_FMUL:
33 case AMDGPU::G_FMA:
34 case AMDGPU::G_FMAD:
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
41 case AMDGPU::G_FSIN:
42 case AMDGPU::G_FPEXT:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
45 case AMDGPU::G_FRINT:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
53 return true;
54 case AMDGPU::G_INTRINSIC: {
55 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val: MI).getIntrinsicID();
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
63 return true;
64 default:
65 return false;
66 }
67 }
68 default:
69 return false;
70 }
71}
72
73/// \p returns true if the operation will definitely need to use a 64-bit
74/// encoding, and thus will use a VOP3 encoding regardless of the source
75/// modifiers.
76LLVM_READONLY
77static bool opMustUseVOP3Encoding(const MachineInstr &MI,
78 const MachineRegisterInfo &MRI) {
79 return MI.getNumOperands() > (isa<GIntrinsic>(Val: MI) ? 4u : 3u) ||
80 MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getScalarSizeInBits() == 64;
81}
82
83// Most FP instructions support source modifiers.
84LLVM_READONLY
85static bool hasSourceMods(const MachineInstr &MI) {
86 if (!MI.memoperands().empty())
87 return false;
88
89 switch (MI.getOpcode()) {
90 case AMDGPU::COPY:
91 case AMDGPU::G_SELECT:
92 case AMDGPU::G_FDIV:
93 case AMDGPU::G_FREM:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
102 case AMDGPU::G_PHI:
103 return false;
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
106 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val: MI).getIntrinsicID();
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
114 return false;
115 default:
116 return true;
117 }
118 }
119 default:
120 return true;
121 }
122}
123
124static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
125 unsigned CostThreshold = 4) {
126 // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
127 // it is truly free to use a source modifier in all cases. If there are
128 // multiple users but for each one will necessitate using VOP3, there will be
129 // a code size increase. Try to avoid increasing code size unless we know it
130 // will save on the instruction count.
131 unsigned NumMayIncreaseSize = 0;
132 Register Dst = MI.getOperand(i: 0).getReg();
133 for (const MachineInstr &Use : MRI.use_nodbg_instructions(Reg: Dst)) {
134 if (!hasSourceMods(MI: Use))
135 return false;
136
137 if (!opMustUseVOP3Encoding(MI: Use, MRI)) {
138 if (++NumMayIncreaseSize > CostThreshold)
139 return false;
140 }
141 }
142 return true;
143}
144
145static bool mayIgnoreSignedZero(MachineInstr &MI) {
146 return MI.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
147}
148
149static bool isInv2Pi(const APFloat &APF) {
150 static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
151 static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
152 static const APFloat KF64(APFloat::IEEEdouble(),
153 APInt(64, 0x3fc45f306dc9c882));
154
155 return APF.bitwiseIsEqual(RHS: KF16) || APF.bitwiseIsEqual(RHS: KF32) ||
156 APF.bitwiseIsEqual(RHS: KF64);
157}
158
159// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
160// additional cost to negate them.
161static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
162 MachineRegisterInfo &MRI) {
163 std::optional<FPValueAndVReg> FPValReg;
164 if (mi_match(R: Reg, MRI, P: m_GFCstOrSplat(FPValReg))) {
165 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
166 return true;
167
168 const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
169 if (ST.hasInv2PiInlineImm() && isInv2Pi(APF: FPValReg->Value))
170 return true;
171 }
172 return false;
173}
174
175static unsigned inverseMinMax(unsigned Opc) {
176 switch (Opc) {
177 case AMDGPU::G_FMAXNUM:
178 return AMDGPU::G_FMINNUM;
179 case AMDGPU::G_FMINNUM:
180 return AMDGPU::G_FMAXNUM;
181 case AMDGPU::G_FMAXNUM_IEEE:
182 return AMDGPU::G_FMINNUM_IEEE;
183 case AMDGPU::G_FMINNUM_IEEE:
184 return AMDGPU::G_FMAXNUM_IEEE;
185 case AMDGPU::G_FMAXIMUM:
186 return AMDGPU::G_FMINIMUM;
187 case AMDGPU::G_FMINIMUM:
188 return AMDGPU::G_FMAXIMUM;
189 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
190 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
191 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
192 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
193 default:
194 llvm_unreachable("invalid min/max opcode");
195 }
196}
197
198bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
199 MachineInstr *&MatchInfo) const {
200 Register Src = MI.getOperand(i: 1).getReg();
201 MatchInfo = MRI.getVRegDef(Reg: Src);
202
203 // If the input has multiple uses and we can either fold the negate down, or
204 // the other uses cannot, give up. This both prevents unprofitable
205 // transformations and infinite loops: we won't repeatedly try to fold around
206 // a negate that has no 'good' form.
207 if (MRI.hasOneNonDBGUse(RegNo: Src)) {
208 if (allUsesHaveSourceMods(MI, MRI, CostThreshold: 0))
209 return false;
210 } else {
211 if (fnegFoldsIntoMI(MI: *MatchInfo) &&
212 (allUsesHaveSourceMods(MI, MRI) ||
213 !allUsesHaveSourceMods(MI&: *MatchInfo, MRI)))
214 return false;
215 }
216
217 switch (MatchInfo->getOpcode()) {
218 case AMDGPU::G_FMINNUM:
219 case AMDGPU::G_FMAXNUM:
220 case AMDGPU::G_FMINNUM_IEEE:
221 case AMDGPU::G_FMAXNUM_IEEE:
222 case AMDGPU::G_FMINIMUM:
223 case AMDGPU::G_FMAXIMUM:
224 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
225 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
226 // 0 doesn't have a negated inline immediate.
227 return !isConstantCostlierToNegate(MI&: *MatchInfo,
228 Reg: MatchInfo->getOperand(i: 2).getReg(), MRI);
229 case AMDGPU::G_FADD:
230 case AMDGPU::G_FSUB:
231 case AMDGPU::G_FMA:
232 case AMDGPU::G_FMAD:
233 return mayIgnoreSignedZero(MI&: *MatchInfo);
234 case AMDGPU::G_FMUL:
235 case AMDGPU::G_FPEXT:
236 case AMDGPU::G_INTRINSIC_TRUNC:
237 case AMDGPU::G_FPTRUNC:
238 case AMDGPU::G_FRINT:
239 case AMDGPU::G_FNEARBYINT:
240 case AMDGPU::G_INTRINSIC_ROUND:
241 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
242 case AMDGPU::G_FSIN:
243 case AMDGPU::G_FCANONICALIZE:
244 case AMDGPU::G_AMDGPU_RCP_IFLAG:
245 return true;
246 case AMDGPU::G_INTRINSIC:
247 case AMDGPU::G_INTRINSIC_CONVERGENT: {
248 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val: MatchInfo)->getIntrinsicID();
249 switch (IntrinsicID) {
250 case Intrinsic::amdgcn_rcp:
251 case Intrinsic::amdgcn_rcp_legacy:
252 case Intrinsic::amdgcn_sin:
253 case Intrinsic::amdgcn_fmul_legacy:
254 case Intrinsic::amdgcn_fmed3:
255 return true;
256 case Intrinsic::amdgcn_fma_legacy:
257 return mayIgnoreSignedZero(MI&: *MatchInfo);
258 default:
259 return false;
260 }
261 }
262 default:
263 return false;
264 }
265}
266
267void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
268 MachineInstr *&MatchInfo) const {
269 // Transform:
270 // %A = inst %Op1, ...
271 // %B = fneg %A
272 //
273 // into:
274 //
275 // (if %A has one use, specifically fneg above)
276 // %B = inst (maybe fneg %Op1), ...
277 //
278 // (if %A has multiple uses)
279 // %B = inst (maybe fneg %Op1), ...
280 // %A = fneg %B
281
282 // Replace register in operand with a register holding negated value.
283 auto NegateOperand = [&](MachineOperand &Op) {
284 Register Reg = Op.getReg();
285 if (!mi_match(R: Reg, MRI, P: m_GFNeg(Src: m_Reg(R&: Reg))))
286 Reg = Builder.buildFNeg(Dst: MRI.getType(Reg), Src0: Reg).getReg(Idx: 0);
287 replaceRegOpWith(MRI, FromRegOp&: Op, ToReg: Reg);
288 };
289
290 // Replace either register in operands with a register holding negated value.
291 auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
292 Register XReg = X.getReg();
293 Register YReg = Y.getReg();
294 if (mi_match(R: XReg, MRI, P: m_GFNeg(Src: m_Reg(R&: XReg))))
295 replaceRegOpWith(MRI, FromRegOp&: X, ToReg: XReg);
296 else if (mi_match(R: YReg, MRI, P: m_GFNeg(Src: m_Reg(R&: YReg))))
297 replaceRegOpWith(MRI, FromRegOp&: Y, ToReg: YReg);
298 else {
299 YReg = Builder.buildFNeg(Dst: MRI.getType(Reg: YReg), Src0: YReg).getReg(Idx: 0);
300 replaceRegOpWith(MRI, FromRegOp&: Y, ToReg: YReg);
301 }
302 };
303
304 Builder.setInstrAndDebugLoc(*MatchInfo);
305
306 // Negate appropriate operands so that resulting value of MatchInfo is
307 // negated.
308 switch (MatchInfo->getOpcode()) {
309 case AMDGPU::G_FADD:
310 case AMDGPU::G_FSUB:
311 NegateOperand(MatchInfo->getOperand(i: 1));
312 NegateOperand(MatchInfo->getOperand(i: 2));
313 break;
314 case AMDGPU::G_FMUL:
315 NegateEitherOperand(MatchInfo->getOperand(i: 1), MatchInfo->getOperand(i: 2));
316 break;
317 case AMDGPU::G_FMINNUM:
318 case AMDGPU::G_FMAXNUM:
319 case AMDGPU::G_FMINNUM_IEEE:
320 case AMDGPU::G_FMAXNUM_IEEE:
321 case AMDGPU::G_FMINIMUM:
322 case AMDGPU::G_FMAXIMUM:
323 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
324 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
325 NegateOperand(MatchInfo->getOperand(i: 1));
326 NegateOperand(MatchInfo->getOperand(i: 2));
327 unsigned Opposite = inverseMinMax(Opc: MatchInfo->getOpcode());
328 replaceOpcodeWith(FromMI&: *MatchInfo, ToOpcode: Opposite);
329 break;
330 }
331 case AMDGPU::G_FMA:
332 case AMDGPU::G_FMAD:
333 NegateEitherOperand(MatchInfo->getOperand(i: 1), MatchInfo->getOperand(i: 2));
334 NegateOperand(MatchInfo->getOperand(i: 3));
335 break;
336 case AMDGPU::G_FPEXT:
337 case AMDGPU::G_INTRINSIC_TRUNC:
338 case AMDGPU::G_FRINT:
339 case AMDGPU::G_FNEARBYINT:
340 case AMDGPU::G_INTRINSIC_ROUND:
341 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
342 case AMDGPU::G_FSIN:
343 case AMDGPU::G_FCANONICALIZE:
344 case AMDGPU::G_AMDGPU_RCP_IFLAG:
345 case AMDGPU::G_FPTRUNC:
346 NegateOperand(MatchInfo->getOperand(i: 1));
347 break;
348 case AMDGPU::G_INTRINSIC:
349 case AMDGPU::G_INTRINSIC_CONVERGENT: {
350 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(Val: MatchInfo)->getIntrinsicID();
351 switch (IntrinsicID) {
352 case Intrinsic::amdgcn_rcp:
353 case Intrinsic::amdgcn_rcp_legacy:
354 case Intrinsic::amdgcn_sin:
355 NegateOperand(MatchInfo->getOperand(i: 2));
356 break;
357 case Intrinsic::amdgcn_fmul_legacy:
358 NegateEitherOperand(MatchInfo->getOperand(i: 2), MatchInfo->getOperand(i: 3));
359 break;
360 case Intrinsic::amdgcn_fmed3:
361 NegateOperand(MatchInfo->getOperand(i: 2));
362 NegateOperand(MatchInfo->getOperand(i: 3));
363 NegateOperand(MatchInfo->getOperand(i: 4));
364 break;
365 case Intrinsic::amdgcn_fma_legacy:
366 NegateEitherOperand(MatchInfo->getOperand(i: 2), MatchInfo->getOperand(i: 3));
367 NegateOperand(MatchInfo->getOperand(i: 4));
368 break;
369 default:
370 llvm_unreachable("folding fneg not supported for this intrinsic");
371 }
372 break;
373 }
374 default:
375 llvm_unreachable("folding fneg not supported for this instruction");
376 }
377
378 Register Dst = MI.getOperand(i: 0).getReg();
379 Register MatchInfoDst = MatchInfo->getOperand(i: 0).getReg();
380
381 if (MRI.hasOneNonDBGUse(RegNo: MatchInfoDst)) {
382 // MatchInfo now has negated value so use that instead of old Dst.
383 replaceRegWith(MRI, FromReg: Dst, ToReg: MatchInfoDst);
384 } else {
385 // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
386 // but replaceRegWith will replace defs as well. It is easier to replace one
387 // def with a new register.
388 LLT Type = MRI.getType(Reg: Dst);
389 Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Ty: Type);
390 replaceRegOpWith(MRI, FromRegOp&: MatchInfo->getOperand(i: 0), ToReg: NegatedMatchInfo);
391
392 // MatchInfo now has negated value so use that instead of old Dst.
393 replaceRegWith(MRI, FromReg: Dst, ToReg: NegatedMatchInfo);
394
395 // Recreate non negated value for other uses of old MatchInfoDst
396 auto NextInst = ++MatchInfo->getIterator();
397 Builder.setInstrAndDebugLoc(*NextInst);
398 Builder.buildFNeg(Dst: MatchInfoDst, Src0: NegatedMatchInfo, Flags: MI.getFlags());
399 }
400
401 MI.eraseFromParent();
402}
403
404// TODO: Should return converted value / extension source and avoid introducing
405// intermediate fptruncs in the apply function.
406static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI,
407 Register Reg) {
408 const MachineInstr *Def = MRI.getVRegDef(Reg);
409 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
410 Register SrcReg = Def->getOperand(i: 1).getReg();
411 return MRI.getType(Reg: SrcReg) == LLT::scalar(SizeInBits: 16);
412 }
413
414 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
415 APFloat Val = Def->getOperand(i: 1).getFPImm()->getValueAPF();
416 bool LosesInfo = true;
417 Val.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &LosesInfo);
418 return !LosesInfo;
419 }
420
421 return false;
422}
423
424bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI,
425 Register Src0,
426 Register Src1,
427 Register Src2) const {
428 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
429 Register SrcReg = MI.getOperand(i: 1).getReg();
430 if (!MRI.hasOneNonDBGUse(RegNo: SrcReg) || MRI.getType(Reg: SrcReg) != LLT::scalar(SizeInBits: 32))
431 return false;
432
433 return isFPExtFromF16OrConst(MRI, Reg: Src0) && isFPExtFromF16OrConst(MRI, Reg: Src1) &&
434 isFPExtFromF16OrConst(MRI, Reg: Src2);
435}
436
437void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
438 Register Src0,
439 Register Src1,
440 Register Src2) const {
441 // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
442 // sources.
443 Src0 = Builder.buildFPTrunc(Res: LLT::scalar(SizeInBits: 16), Op: Src0).getReg(Idx: 0);
444 Src1 = Builder.buildFPTrunc(Res: LLT::scalar(SizeInBits: 16), Op: Src1).getReg(Idx: 0);
445 Src2 = Builder.buildFPTrunc(Res: LLT::scalar(SizeInBits: 16), Op: Src2).getReg(Idx: 0);
446
447 LLT Ty = MRI.getType(Reg: Src0);
448 auto A1 = Builder.buildFMinNumIEEE(Dst: Ty, Src0, Src1);
449 auto B1 = Builder.buildFMaxNumIEEE(Dst: Ty, Src0, Src1);
450 auto C1 = Builder.buildFMaxNumIEEE(Dst: Ty, Src0: A1, Src1: Src2);
451 Builder.buildFMinNumIEEE(Dst: MI.getOperand(i: 0), Src0: B1, Src1: C1);
452 MI.eraseFromParent();
453}
454
455bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
456 MachineInstr &MI, MachineInstr &Sel,
457 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
458 assert(MI.getOpcode() == TargetOpcode::G_FMUL);
459 assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
460 assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
461
462 Register Dst = MI.getOperand(i: 0).getReg();
463 LLT DestTy = MRI.getType(Reg: Dst);
464 LLT ScalarDestTy = DestTy.getScalarType();
465
466 if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
467 ScalarDestTy != LLT::float16()) ||
468 !MRI.hasOneNonDBGUse(RegNo: Sel.getOperand(i: 0).getReg()))
469 return false;
470
471 Register SelectCondReg = Sel.getOperand(i: 1).getReg();
472 MachineInstr *SelectTrue = MRI.getVRegDef(Reg: Sel.getOperand(i: 2).getReg());
473 MachineInstr *SelectFalse = MRI.getVRegDef(Reg: Sel.getOperand(i: 3).getReg());
474
475 const auto SelectTrueVal =
476 isConstantOrConstantSplatVectorFP(MI&: *SelectTrue, MRI);
477 if (!SelectTrueVal)
478 return false;
479 const auto SelectFalseVal =
480 isConstantOrConstantSplatVectorFP(MI&: *SelectFalse, MRI);
481 if (!SelectFalseVal)
482 return false;
483
484 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
485 return false;
486
487 // For f32, only non-inline constants should be transformed.
488 if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(Imm: *SelectTrueVal) &&
489 TII.isInlineConstant(Imm: *SelectFalseVal))
490 return false;
491
492 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
493 if (SelectTrueLog2Val == INT_MIN)
494 return false;
495 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
496 if (SelectFalseLog2Val == INT_MIN)
497 return false;
498
499 MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
500 LLT IntDestTy = DestTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 32));
501 auto NewSel = Builder.buildSelect(
502 Res: IntDestTy, Tst: SelectCondReg,
503 Op0: Builder.buildConstant(Res: IntDestTy, Val: SelectTrueLog2Val),
504 Op1: Builder.buildConstant(Res: IntDestTy, Val: SelectFalseLog2Val));
505
506 Register XReg = MI.getOperand(i: 1).getReg();
507 if (SelectTrueVal->isNegative()) {
508 auto NegX =
509 Builder.buildFNeg(Dst: DestTy, Src0: XReg, Flags: MRI.getVRegDef(Reg: XReg)->getFlags());
510 Builder.buildFLdexp(Dst, Src0: NegX, Src1: NewSel, Flags: MI.getFlags());
511 } else {
512 Builder.buildFLdexp(Dst, Src0: XReg, Src1: NewSel, Flags: MI.getFlags());
513 }
514 };
515
516 return true;
517}
518
519bool AMDGPUCombinerHelper::matchConstantIs32BitMask(Register Reg) const {
520 auto Res = getIConstantVRegValWithLookThrough(VReg: Reg, MRI);
521 if (!Res)
522 return false;
523
524 const uint64_t Val = Res->Value.getZExtValue();
525 unsigned MaskIdx = 0;
526 unsigned MaskLen = 0;
527 if (!isShiftedMask_64(Value: Val, MaskIdx, MaskLen))
528 return false;
529
530 // Check if low 32 bits or high 32 bits are all ones.
531 return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
532}
533