1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P3:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
65 case P4:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
67 case P5:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
69 case Ptr32:
70 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
71 case Ptr64:
72 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
73 case Ptr128:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
75 case V2S32:
76 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
77 case V4S32:
78 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
79 case B32:
80 return MRI.getType(Reg).getSizeInBits() == 32;
81 case B64:
82 return MRI.getType(Reg).getSizeInBits() == 64;
83 case B96:
84 return MRI.getType(Reg).getSizeInBits() == 96;
85 case B128:
86 return MRI.getType(Reg).getSizeInBits() == 128;
87 case B256:
88 return MRI.getType(Reg).getSizeInBits() == 256;
89 case B512:
90 return MRI.getType(Reg).getSizeInBits() == 512;
91 case UniS1:
92 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg);
93 case UniS16:
94 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg);
95 case UniS32:
96 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg);
97 case UniS64:
98 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg);
99 case UniS128:
100 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg);
101 case UniP0:
102 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg);
103 case UniP1:
104 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg);
105 case UniP3:
106 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg);
107 case UniP4:
108 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg);
109 case UniP5:
110 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg);
111 case UniPtr32:
112 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg);
113 case UniPtr64:
114 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg);
115 case UniPtr128:
116 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg);
117 case UniV2S16:
118 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg);
119 case UniB32:
120 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg);
121 case UniB64:
122 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg);
123 case UniB96:
124 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg);
125 case UniB128:
126 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg);
127 case UniB256:
128 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg);
129 case UniB512:
130 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg);
131 case DivS1:
132 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg);
133 case DivS16:
134 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg);
135 case DivS32:
136 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg);
137 case DivS64:
138 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg);
139 case DivS128:
140 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg);
141 case DivP0:
142 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg);
143 case DivP1:
144 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg);
145 case DivP3:
146 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg);
147 case DivP4:
148 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg);
149 case DivP5:
150 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg);
151 case DivPtr32:
152 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg);
153 case DivPtr64:
154 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg);
155 case DivPtr128:
156 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg);
157 case DivV2S16:
158 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
159 case DivB32:
160 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg);
161 case DivB64:
162 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg);
163 case DivB96:
164 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg);
165 case DivB128:
166 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg);
167 case DivB256:
168 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg);
169 case DivB512:
170 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg);
171 case _:
172 return true;
173 default:
174 llvm_unreachable("missing matchUniformityAndLLT");
175 }
176}
177
178bool PredicateMapping::match(const MachineInstr &MI,
179 const MachineUniformityInfo &MUI,
180 const MachineRegisterInfo &MRI) const {
181 // Check LLT signature.
182 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
183 if (OpUniformityAndTypes[i] == _) {
184 if (MI.getOperand(i).isReg())
185 return false;
186 continue;
187 }
188
189 // Remaining IDs check registers.
190 if (!MI.getOperand(i).isReg())
191 return false;
192
193 if (!matchUniformityAndLLT(Reg: MI.getOperand(i).getReg(),
194 UniID: OpUniformityAndTypes[i], MUI, MRI))
195 return false;
196 }
197
198 // More complex check.
199 if (TestFunc)
200 return TestFunc(MI);
201
202 return true;
203}
204
205SetOfRulesForOpcode::SetOfRulesForOpcode() {}
206
207SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
208 : FastTypes(FastTypes) {}
209
210UniformityLLTOpPredicateID LLTToId(LLT Ty) {
211 if (Ty == LLT::scalar(SizeInBits: 16))
212 return S16;
213 if (Ty == LLT::scalar(SizeInBits: 32))
214 return S32;
215 if (Ty == LLT::scalar(SizeInBits: 64))
216 return S64;
217 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
218 return V2S16;
219 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
220 return V2S32;
221 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
222 return V3S32;
223 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
224 return V4S32;
225 return _;
226}
227
228UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
229 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
230 isAnyPtr(Ty, Width: 32))
231 return B32;
232 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
233 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
234 return B64;
235 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
236 return B96;
237 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || isAnyPtr(Ty, Width: 128))
238 return B128;
239 return _;
240}
241
242const RegBankLLTMapping &
243SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
244 const MachineRegisterInfo &MRI,
245 const MachineUniformityInfo &MUI) const {
246 // Search in "Fast Rules".
247 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
248 // slot that could "match fast Predicate". If not, InvalidMapping is
249 // returned which results in failure, does not search "Slow Rules".
250 if (FastTypes != NoFastRules) {
251 Register Reg = MI.getOperand(i: 0).getReg();
252 int Slot;
253 if (FastTypes == StandardB)
254 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
255 else
256 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
257
258 if (Slot != -1)
259 return MUI.isUniform(V: Reg) ? Uni[Slot] : Div[Slot];
260 }
261
262 // Slow search for more complex rules.
263 for (const RegBankLegalizeRule &Rule : Rules) {
264 if (Rule.Predicate.match(MI, MUI, MRI))
265 return Rule.OperandMapping;
266 }
267
268 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
269 llvm_unreachable("None of the rules defined for MI's opcode matched MI");
270}
271
272void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
273 Rules.push_back(Elt: Rule);
274}
275
276void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
277 RegBankLLTMapping RuleApplyIDs) {
278 int Slot = getFastPredicateSlot(Ty);
279 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
280 Div[Slot] = RuleApplyIDs;
281}
282
283void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
284 RegBankLLTMapping RuleApplyIDs) {
285 int Slot = getFastPredicateSlot(Ty);
286 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
287 Uni[Slot] = RuleApplyIDs;
288}
289
290int SetOfRulesForOpcode::getFastPredicateSlot(
291 UniformityLLTOpPredicateID Ty) const {
292 switch (FastTypes) {
293 case Standard: {
294 switch (Ty) {
295 case S32:
296 return 0;
297 case S16:
298 return 1;
299 case S64:
300 return 2;
301 case V2S16:
302 return 3;
303 default:
304 return -1;
305 }
306 }
307 case StandardB: {
308 switch (Ty) {
309 case B32:
310 return 0;
311 case B64:
312 return 1;
313 case B96:
314 return 2;
315 case B128:
316 return 3;
317 default:
318 return -1;
319 }
320 }
321 case Vector: {
322 switch (Ty) {
323 case S32:
324 return 0;
325 case V2S32:
326 return 1;
327 case V3S32:
328 return 2;
329 case V4S32:
330 return 3;
331 default:
332 return -1;
333 }
334 }
335 default:
336 return -1;
337 }
338}
339
340RegBankLegalizeRules::RuleSetInitializer
341RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
342 FastRulesTypes FastTypes) {
343 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
344}
345
346RegBankLegalizeRules::RuleSetInitializer
347RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
348 FastRulesTypes FastTypes) {
349 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
350}
351
352const SetOfRulesForOpcode &
353RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
354 unsigned Opc = MI.getOpcode();
355 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
356 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
357 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
358 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
359 auto IRAIt = IRulesAlias.find(Val: IntrID);
360 if (IRAIt == IRulesAlias.end()) {
361 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
362 llvm_unreachable("No rules defined for intrinsic opcode");
363 }
364 return IRules.at(Val: IRAIt->second);
365 }
366
367 auto GRAIt = GRulesAlias.find(Val: Opc);
368 if (GRAIt == GRulesAlias.end()) {
369 LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
370 llvm_unreachable("No rules defined for generic opcode");
371 }
372 return GRules.at(Val: GRAIt->second);
373}
374
375// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
376class Predicate {
377private:
378 struct Elt {
379 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
380 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
381 // Sequences of && and || will be represented by jumps, for example:
382 // (A && B && ... X) or (A && B && ... X) || Y
383 // A == true jump to B
384 // A == false jump to end or Y, result is A(false) or Y
385 // (A || B || ... X) or (A || B || ... X) && Y
386 // A == true jump to end or Y, result is A(true) or Y
387 // A == false jump to B
388 // Notice that when negating expression, we simply flip Neg on each Pred
389 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
390 std::function<bool(const MachineInstr &)> Pred;
391 bool Neg; // Neg of Pred is calculated before jump
392 unsigned TJumpOffset;
393 unsigned FJumpOffset;
394 };
395
396 SmallVector<Elt, 8> Expression;
397
398 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
399
400public:
401 Predicate(std::function<bool(const MachineInstr &)> Pred) {
402 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
403 };
404
405 bool operator()(const MachineInstr &MI) const {
406 unsigned Idx = 0;
407 unsigned ResultIdx = Expression.size();
408 bool Result;
409 do {
410 Result = Expression[Idx].Pred(MI);
411 Result = Expression[Idx].Neg ? !Result : Result;
412 if (Result) {
413 Idx += Expression[Idx].TJumpOffset;
414 } else {
415 Idx += Expression[Idx].FJumpOffset;
416 }
417 } while ((Idx != ResultIdx));
418
419 return Result;
420 };
421
422 Predicate operator!() const {
423 SmallVector<Elt, 8> NegExpression;
424 for (const Elt &ExprElt : Expression) {
425 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
426 .FJumpOffset: ExprElt.TJumpOffset});
427 }
428 return Predicate(std::move(NegExpression));
429 };
430
431 Predicate operator&&(const Predicate &RHS) const {
432 SmallVector<Elt, 8> AndExpression = Expression;
433
434 unsigned RHSSize = RHS.Expression.size();
435 unsigned ResultIdx = Expression.size();
436 for (unsigned i = 0; i < ResultIdx; ++i) {
437 // LHS results in false, whole expression results in false.
438 if (i + AndExpression[i].FJumpOffset == ResultIdx)
439 AndExpression[i].FJumpOffset += RHSSize;
440 }
441
442 AndExpression.append(RHS: RHS.Expression);
443
444 return Predicate(std::move(AndExpression));
445 }
446
447 Predicate operator||(const Predicate &RHS) const {
448 SmallVector<Elt, 8> OrExpression = Expression;
449
450 unsigned RHSSize = RHS.Expression.size();
451 unsigned ResultIdx = Expression.size();
452 for (unsigned i = 0; i < ResultIdx; ++i) {
453 // LHS results in true, whole expression results in true.
454 if (i + OrExpression[i].TJumpOffset == ResultIdx)
455 OrExpression[i].TJumpOffset += RHSSize;
456 }
457
458 OrExpression.append(RHS: RHS.Expression);
459
460 return Predicate(std::move(OrExpression));
461 }
462};
463
464// Initialize rules
465RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
466 MachineRegisterInfo &_MRI)
467 : ST(&_ST), MRI(&_MRI) {
468
469 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
470 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
471 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
472
473 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard).Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
474
475 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
476 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
477 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
478 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
479 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
480 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
481 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
482 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
483 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
484
485 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
486 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
487 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
488 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
489 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
490 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
491 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
492 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
493 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
494
495 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
496 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
497 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
498 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
499 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
500 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
501 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
502 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
503 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
504
505 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
506 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
507 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
508 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
509 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
510 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
511 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
512 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
513 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
514
515 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
516
517 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
518 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
519 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
520 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
521 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
522
523 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
524 // and G_FREEZE here, rest is trivially regbankselected earlier
525 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
526 addRulesForGOpcs(OpcList: {G_CONSTANT})
527 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}});
528 addRulesForGOpcs(OpcList: {G_FREEZE}).Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}});
529
530 addRulesForGOpcs(OpcList: {G_ICMP})
531 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
532 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}});
533
534 addRulesForGOpcs(OpcList: {G_FCMP})
535 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}})
536 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}});
537
538 addRulesForGOpcs(OpcList: {G_BRCOND})
539 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
540 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
541
542 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
543
544 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
545 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
546 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
547 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
548 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
549 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
550 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
551
552 addRulesForGOpcs(OpcList: {G_ANYEXT})
553 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
554 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
555 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
556 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
557 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
558 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
559 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
560 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
561 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
562 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
563
564 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
565 // It is up to user to deal with truncated bits.
566 addRulesForGOpcs(OpcList: {G_TRUNC})
567 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
568 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
569 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
570 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
571 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
572 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
573 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
574 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
575 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
576 // This is non-trivial. VgprToVccCopy is done using compare instruction.
577 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}})
578 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
579 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
580
581 addRulesForGOpcs(OpcList: {G_ZEXT})
582 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
583 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
584 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
585 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
586 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
587 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
588 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
589 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
590 // not extending S16 to S32 is questionable.
591 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
592 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
593 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
594 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
595
596 addRulesForGOpcs(OpcList: {G_SEXT})
597 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
598 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
599 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
600 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
601 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
602 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
603 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
604 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
605 // not extending S16 to S32 is questionable.
606 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
607 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
608 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
609 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
610
611 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
612 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
613 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
614 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
615 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
616
617 bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12;
618 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
619
620 Predicate isAlign16([](const MachineInstr &MI) -> bool {
621 return (*MI.memoperands_begin())->getAlign() >= Align(16);
622 });
623
624 Predicate isAlign4([](const MachineInstr &MI) -> bool {
625 return (*MI.memoperands_begin())->getAlign() >= Align(4);
626 });
627
628 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
629 return (*MI.memoperands_begin())->isAtomic();
630 });
631
632 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
633 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
634 });
635
636 Predicate isConst([](const MachineInstr &MI) -> bool {
637 // Address space in MMO be different then address space on pointer.
638 const MachineMemOperand *MMO = *MI.memoperands_begin();
639 const unsigned AS = MMO->getAddrSpace();
640 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
641 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
642 });
643
644 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
645 return (*MI.memoperands_begin())->isVolatile();
646 });
647
648 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
649 return (*MI.memoperands_begin())->isInvariant();
650 });
651
652 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
653 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
654 });
655
656 Predicate isNaturalAlignedSmall([](const MachineInstr &MI) -> bool {
657 const MachineMemOperand *MMO = *MI.memoperands_begin();
658 const unsigned MemSize = 8 * MMO->getSize().getValue();
659 return (MemSize == 16 && MMO->getAlign() >= Align(2)) ||
660 (MemSize == 8 && MMO->getAlign() >= Align(1));
661 });
662
663 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
664 (isConst || isInvMMO || isNoClobberMMO);
665
666 // clang-format off
667 addRulesForGOpcs(OpcList: {G_LOAD})
668 .Any(Init: {.Predicate: {DivB32, DivP0}, .OperandMapping: {{VgprB32}, {VgprP0}}})
669
670 .Any(Init: {.Predicate: {DivB32, DivP1}, .OperandMapping: {{VgprB32}, {VgprP1}}})
671 .Any(Init: {.Predicate: {{UniB256, UniP1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
672 .Any(Init: {.Predicate: {{UniB512, UniP1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
673 .Any(Init: {.Predicate: {{UniB32, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}})
674 .Any(Init: {.Predicate: {{UniB256, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {VgprP1}, SplitLoad}})
675 .Any(Init: {.Predicate: {{UniB512, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {VgprP1}, SplitLoad}})
676
677 .Any(Init: {.Predicate: {DivB32, UniP3}, .OperandMapping: {{VgprB32}, {VgprP3}}})
678 .Any(Init: {.Predicate: {{UniB32, UniP3}, isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP3}}})
679 .Any(Init: {.Predicate: {{UniB32, UniP3}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}})
680
681 .Any(Init: {.Predicate: {{DivB256, DivP4}}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
682 .Any(Init: {.Predicate: {{UniB32, UniP4}, isNaturalAlignedSmall && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) // i8 and i16 load
683 .Any(Init: {.Predicate: {{UniB32, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}})
684 .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasUnalignedLoads)
685 .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasUnalignedLoads)
686 .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasUnalignedLoads)
687 .Any(Init: {.Predicate: {{UniB256, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
688 .Any(Init: {.Predicate: {{UniB512, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
689 .Any(Init: {.Predicate: {{UniB32, UniP4}, !isNaturalAlignedSmall || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}}, STPred: hasSMRDSmall) // i8 and i16 load
690 .Any(Init: {.Predicate: {{UniB32, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}})
691 .Any(Init: {.Predicate: {{UniB256, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {VgprP4}, SplitLoad}})
692 .Any(Init: {.Predicate: {{UniB512, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {VgprP4}, SplitLoad}})
693
694 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}});
695
696 addRulesForGOpcs(OpcList: {G_ZEXTLOAD}) // i8 and i16 zero-extending loads
697 .Any(Init: {.Predicate: {{UniB32, UniP3}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}})
698 .Any(Init: {.Predicate: {{UniB32, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}});
699 // clang-format on
700
701 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD}, FastTypes: Vector)
702 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
703 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
704 .Div(Ty: V4S32, RuleApplyIDs: {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
705 .Uni(Ty: V4S32, RuleApplyIDs: {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
706
707 addRulesForGOpcs(OpcList: {G_STORE})
708 .Any(Init: {.Predicate: {S32, P0}, .OperandMapping: {{}, {Vgpr32, VgprP0}}})
709 .Any(Init: {.Predicate: {S32, P1}, .OperandMapping: {{}, {Vgpr32, VgprP1}}})
710 .Any(Init: {.Predicate: {S64, P1}, .OperandMapping: {{}, {Vgpr64, VgprP1}}})
711 .Any(Init: {.Predicate: {V4S32, P1}, .OperandMapping: {{}, {VgprV4S32, VgprP1}}});
712
713 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE})
714 .Any(Init: {.Predicate: {S32}, .OperandMapping: {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
715
716 addRulesForGOpcs(OpcList: {G_PTR_ADD})
717 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
718 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
719 .Any(Init: {.Predicate: {DivP0}, .OperandMapping: {{VgprP0}, {VgprP0, Vgpr64}}});
720
721 addRulesForGOpcs(OpcList: {G_INTTOPTR})
722 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
723 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
724 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
725 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
726 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
727 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
728
729 addRulesForGOpcs(OpcList: {G_PTRTOINT})
730 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
731 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
732 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
733 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
734 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
735 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
736
737 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}});
738
739 bool hasSALUFloat = ST->hasSALUFloatInsts();
740
741 addRulesForGOpcs(OpcList: {G_FADD}, FastTypes: Standard)
742 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
743 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
744 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
745
746 addRulesForGOpcs(OpcList: {G_FPTOUI})
747 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
748 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat);
749
750 addRulesForGOpcs(OpcList: {G_UITOFP})
751 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
752 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
753 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat);
754
755 using namespace Intrinsic;
756
757 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}});
758
759 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
760 addRulesForIOpcs(OpcList: {amdgcn_end_cf}).Any(Init: {.Predicate: {_, S32}, .OperandMapping: {{}, {None, Sgpr32}}});
761
762 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
763 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
764
765 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
766 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
767
768 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
769 .Any(Init: {.Predicate: {UniS32, _, DivS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}})
770 // this should not exist in the first place, it is from call lowering
771 // readfirstlaning just in case register is not in sgpr.
772 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
773
774} // end initialize rules
775