1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128);
73 case Ptr32:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
75 case Ptr64:
76 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
77 case Ptr128:
78 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isUniform(V: Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128) && MUI.isUniform(V: Reg);
125 case UniPtr32:
126 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg);
127 case UniPtr64:
128 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg);
129 case UniPtr128:
130 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isUniform(V: Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(V: Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(V: Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isDivergent(V: Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg);
182 case DivPtr32:
183 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg);
184 case DivPtr64:
185 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg);
186 case DivPtr128:
187 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
192 case DivV3S32:
193 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
194 case DivV4S16:
195 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
196 case DivB32:
197 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg);
198 case DivB64:
199 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg);
200 case DivB96:
201 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg);
202 case DivB128:
203 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg);
204 case DivB160:
205 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(V: Reg);
206 case DivB256:
207 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg);
208 case DivB512:
209 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg);
210 case DivBRC: {
211 if (!MUI.isDivergent(V: Reg))
212 return false;
213 // Check if there is VGPR register class of same size as the LLT.
214 const SIRegisterInfo *TRI =
215 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
216 return TRI->getSGPRClassForBitWidth(BitWidth: MRI.getType(Reg).getSizeInBits());
217 }
218 case _:
219 return true;
220 default:
221 llvm_unreachable("missing matchUniformityAndLLT");
222 }
223}
224
225bool PredicateMapping::match(const MachineInstr &MI,
226 const MachineUniformityInfo &MUI,
227 const MachineRegisterInfo &MRI) const {
228 // Check LLT signature.
229 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
230 const MachineOperand &MO = MI.getOperand(i);
231 if (OpUniformityAndTypes[i] == _) {
232 assert((!MI.getOperand(i).isReg() ||
233 !MI.getOperand(i).getReg().isVirtual()) &&
234 "_ is for non-register and physical register operands only");
235 continue;
236 }
237
238 // Remaining IDs check registers.
239 if (!MO.isReg())
240 return false;
241
242 if (!matchUniformityAndLLT(Reg: MO.getReg(), UniID: OpUniformityAndTypes[i], MUI, MRI))
243 return false;
244 }
245
246 // More complex check.
247 if (TestFunc)
248 return TestFunc(MI);
249
250 return true;
251}
252
253SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
254
255SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
256 : FastTypes(FastTypes) {}
257
258UniformityLLTOpPredicateID LLTToId(LLT Ty) {
259 if (Ty == LLT::scalar(SizeInBits: 16))
260 return S16;
261 if (Ty == LLT::scalar(SizeInBits: 32))
262 return S32;
263 if (Ty == LLT::scalar(SizeInBits: 64))
264 return S64;
265 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
266 return V2S16;
267 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
268 return V2S32;
269 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
270 return V3S32;
271 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
272 return V4S32;
273 return _;
274}
275
276UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
277 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
278 isAnyPtr(Ty, Width: 32))
279 return B32;
280 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
281 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
282 return B64;
283 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
284 return B96;
285 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ||
286 Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 128))
287 return B128;
288 return _;
289}
290
291const RegBankLLTMapping *
292SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
293 const MachineRegisterInfo &MRI,
294 const MachineUniformityInfo &MUI) const {
295 // Search in "Fast Rules".
296 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
297 // slot that could "match fast Predicate". If not, InvalidMapping is
298 // returned which results in failure, does not search "Slow Rules".
299 if (FastTypes != NoFastRules) {
300 Register Reg = MI.getOperand(i: 0).getReg();
301 int Slot;
302 if (FastTypes == StandardB)
303 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
304 else
305 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
306
307 if (Slot != -1)
308 return MUI.isUniform(V: Reg) ? &Uni[Slot] : &Div[Slot];
309 }
310
311 // Slow search for more complex rules.
312 for (const RegBankLegalizeRule &Rule : Rules) {
313 if (Rule.Predicate.match(MI, MUI, MRI))
314 return &Rule.OperandMapping;
315 }
316
317 return nullptr;
318}
319
320void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
321 Rules.push_back(Elt: Rule);
322}
323
324void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
325 RegBankLLTMapping RuleApplyIDs) {
326 int Slot = getFastPredicateSlot(Ty);
327 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
328 Div[Slot] = std::move(RuleApplyIDs);
329}
330
331void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
332 RegBankLLTMapping RuleApplyIDs) {
333 int Slot = getFastPredicateSlot(Ty);
334 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
335 Uni[Slot] = std::move(RuleApplyIDs);
336}
337
338int SetOfRulesForOpcode::getFastPredicateSlot(
339 UniformityLLTOpPredicateID Ty) const {
340 switch (FastTypes) {
341 case Standard: {
342 switch (Ty) {
343 case S32:
344 return 0;
345 case S16:
346 return 1;
347 case S64:
348 return 2;
349 case V2S16:
350 return 3;
351 default:
352 return -1;
353 }
354 }
355 case StandardB: {
356 switch (Ty) {
357 case B32:
358 return 0;
359 case B64:
360 return 1;
361 case B96:
362 return 2;
363 case B128:
364 return 3;
365 default:
366 return -1;
367 }
368 }
369 case Vector: {
370 switch (Ty) {
371 case S32:
372 return 0;
373 case V2S32:
374 return 1;
375 case V3S32:
376 return 2;
377 case V4S32:
378 return 3;
379 default:
380 return -1;
381 }
382 }
383 default:
384 return -1;
385 }
386}
387
388RegBankLegalizeRules::RuleSetInitializer
389RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
390 FastRulesTypes FastTypes) {
391 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
392}
393
394RegBankLegalizeRules::RuleSetInitializer
395RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
396 FastRulesTypes FastTypes) {
397 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
398}
399
400const SetOfRulesForOpcode *
401RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
402 unsigned Opc = MI.getOpcode();
403 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
404 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
405 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
406 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
407 auto IRAIt = IRulesAlias.find(Val: IntrID);
408 if (IRAIt == IRulesAlias.end())
409 return nullptr;
410 return &IRules.at(Val: IRAIt->second);
411 }
412
413 auto GRAIt = GRulesAlias.find(Val: Opc);
414 if (GRAIt == GRulesAlias.end())
415 return nullptr;
416 return &GRules.at(Val: GRAIt->second);
417}
418
419// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
420class Predicate {
421private:
422 struct Elt {
423 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
424 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
425 // Sequences of && and || will be represented by jumps, for example:
426 // (A && B && ... X) or (A && B && ... X) || Y
427 // A == true jump to B
428 // A == false jump to end or Y, result is A(false) or Y
429 // (A || B || ... X) or (A || B || ... X) && Y
430 // A == true jump to end or Y, result is A(true) or Y
431 // A == false jump to B
432 // Notice that when negating expression, we simply flip Neg on each Pred
433 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
434 std::function<bool(const MachineInstr &)> Pred;
435 bool Neg; // Neg of Pred is calculated before jump
436 unsigned TJumpOffset;
437 unsigned FJumpOffset;
438 };
439
440 SmallVector<Elt, 8> Expression;
441
442 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
443
444public:
445 Predicate(std::function<bool(const MachineInstr &)> Pred) {
446 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
447 };
448
449 bool operator()(const MachineInstr &MI) const {
450 unsigned Idx = 0;
451 unsigned ResultIdx = Expression.size();
452 bool Result;
453 do {
454 Result = Expression[Idx].Pred(MI);
455 Result = Expression[Idx].Neg ? !Result : Result;
456 if (Result) {
457 Idx += Expression[Idx].TJumpOffset;
458 } else {
459 Idx += Expression[Idx].FJumpOffset;
460 }
461 } while ((Idx != ResultIdx));
462
463 return Result;
464 };
465
466 Predicate operator!() const {
467 SmallVector<Elt, 8> NegExpression;
468 for (const Elt &ExprElt : Expression) {
469 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
470 .FJumpOffset: ExprElt.TJumpOffset});
471 }
472 return Predicate(std::move(NegExpression));
473 };
474
475 Predicate operator&&(const Predicate &RHS) const {
476 SmallVector<Elt, 8> AndExpression = Expression;
477
478 unsigned RHSSize = RHS.Expression.size();
479 unsigned ResultIdx = Expression.size();
480 for (unsigned i = 0; i < ResultIdx; ++i) {
481 // LHS results in false, whole expression results in false.
482 if (i + AndExpression[i].FJumpOffset == ResultIdx)
483 AndExpression[i].FJumpOffset += RHSSize;
484 }
485
486 AndExpression.append(RHS: RHS.Expression);
487
488 return Predicate(std::move(AndExpression));
489 }
490
491 Predicate operator||(const Predicate &RHS) const {
492 SmallVector<Elt, 8> OrExpression = Expression;
493
494 unsigned RHSSize = RHS.Expression.size();
495 unsigned ResultIdx = Expression.size();
496 for (unsigned i = 0; i < ResultIdx; ++i) {
497 // LHS results in true, whole expression results in true.
498 if (i + OrExpression[i].TJumpOffset == ResultIdx)
499 OrExpression[i].TJumpOffset += RHSSize;
500 }
501
502 OrExpression.append(RHS: RHS.Expression);
503
504 return Predicate(std::move(OrExpression));
505 }
506};
507
508// Initialize rules
509RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
510 MachineRegisterInfo &_MRI)
511 : ST(&_ST), MRI(&_MRI) {
512
513 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
514 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
515 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
516 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
517 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
518 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
519 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
520 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}})
521 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
522
523 addRulesForGOpcs(OpcList: {G_UADDO, G_USUBO}, FastTypes: Standard)
524 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
525 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
526
527 addRulesForGOpcs(OpcList: {G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, FastTypes: Standard)
528 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
529 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
530
531 addRulesForGOpcs(OpcList: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, FastTypes: Standard)
532 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
533 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
534 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
535 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
536 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
537 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
538
539 bool HasVecMulU64 = ST->hasVectorMulU64();
540 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard)
541 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
542 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
543 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
544 .Uni(Ty: S64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
545 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
546 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
547 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
548 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}}, STPred: HasVecMulU64)
549 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, STPred: !HasVecMulU64);
550
551 bool hasMulHi = ST->hasScalarMulHiInsts();
552 addRulesForGOpcs(OpcList: {G_UMULH, G_SMULH}, FastTypes: Standard)
553 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
554 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasMulHi)
555 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasMulHi);
556
557 addRulesForGOpcs(OpcList: {G_AMDGPU_MAD_U64_U32}, FastTypes: Standard)
558 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
559 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
560
561 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
562 addRulesForGOpcs(OpcList: {G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, FastTypes: Standard)
563 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, STPred: HasScalarSMulU64)
564 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
565
566 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
567 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
568 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
569 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
570 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
571 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
572 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
573 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
574 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
575
576 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
577 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
578 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
579 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
580 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
581 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
582 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
583 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
584 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
585
586 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
587 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
588 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
589 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
590 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
591 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
592 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
593 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
594 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
595
596 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
597 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
598 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
599 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
600 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
601 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
602 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
603 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
604 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
605
606 addRulesForGOpcs(OpcList: {G_FSHR}, FastTypes: Standard)
607 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
608 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
609
610 addRulesForGOpcs(OpcList: {G_BSWAP}, FastTypes: Standard)
611 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
612 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
613 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
614 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
615 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
616 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}});
617
618 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
619 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
620 G_AMDGPU_RCP_IFLAG},
621 FastTypes: Standard)
622 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
623 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}});
624
625 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
626
627 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
628 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
629 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
630 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
631 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
632
633 addRulesForGOpcs(OpcList: {G_SMIN, G_SMAX}, FastTypes: Standard)
634 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
635 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
636 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
637 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
638 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
639 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
640
641 addRulesForGOpcs(OpcList: {G_UMIN, G_UMAX}, FastTypes: Standard)
642 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
643 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
644 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
645 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
646 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
647 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
648
649 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
650 // here, rest is trivially regbankselected earlier
651 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
652 addRulesForGOpcs(OpcList: {G_CONSTANT})
653 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}});
654
655 addRulesForGOpcs(OpcList: {G_FREEZE})
656 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt}}})
657 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}})
658 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16}}})
659 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
660 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}});
661
662 addRulesForGOpcs(OpcList: {G_UNMERGE_VALUES})
663 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, UnmergeToShiftTrunc}})
664 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
665 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
666
667 addRulesForGOpcs(OpcList: {G_PHI})
668 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {}, AextToS32InIncomingBlockGPHI}})
669 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
670 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
671 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, VerifyAllSgprOrVgprGPHI}});
672
673 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
674 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
675 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
676 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
677 addRulesForGOpcs(OpcList: {G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
678 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
679 G_AMDGPU_INTRIN_IMAGE_STORE,
680 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
681 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyINTRIN_IMAGE}});
682
683 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
684 auto Pred =
685 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
686 return CmpInst::isSigned(Pred);
687 });
688
689 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
690 auto Pred =
691 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
692 return ICmpInst::isEquality(P: Pred);
693 });
694
695 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
696 // clang-format off
697 addRulesForGOpcs(OpcList: {G_ICMP})
698 .Any(Init: {.Predicate: {{UniS1, _, S16}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
699 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
700 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
701 .Any(Init: {.Predicate: {{DivS1, _, S16}}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
702 .Any(Init: {.Predicate: {{UniS1, _, S32}}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
703 .Any(Init: {.Predicate: {{DivS1, _, S32}}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
704 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, STPred: HasScalarCompareEq64)
705 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, STPred: !HasScalarCompareEq64)
706 .Any(Init: {.Predicate: {{UniS1, _, S64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
707 .Any(Init: {.Predicate: {{DivS1, _, S64}}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}})
708 .Any(Init: {.Predicate: {{UniS1, _, Ptr32}}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
709 .Any(Init: {.Predicate: {{DivS1, _, Ptr32}}, .OperandMapping: {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
710 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, STPred: HasScalarCompareEq64)
711 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, STPred: !HasScalarCompareEq64)
712 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
713 .Any(Init: {.Predicate: {{DivS1, _, Ptr64}}, .OperandMapping: {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
714 // clang-format on
715
716 addRulesForGOpcs(OpcList: {G_BRCOND})
717 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
718 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
719
720 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
721
722 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
723 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
724 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
725 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
726 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
727 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
728 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
729
730 addRulesForGOpcs(OpcList: {G_ANYEXT})
731 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
732 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
733 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
734 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
735 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
736 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
737 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
738 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
739 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
740 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
741
742 bool Has16bitCmp = ST->has16BitInsts();
743
744 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
745 // It is up to user to deal with truncated bits.
746 addRulesForGOpcs(OpcList: {G_TRUNC})
747 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
748 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
749 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
750 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
751 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
752 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
753 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
754 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
755 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
756 // This is non-trivial. VgprToVccCopy is done using compare instruction.
757 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}, STPred: Has16bitCmp)
758 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr32AExt}, VgprToVccCopy}},
759 STPred: !Has16bitCmp)
760 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
761 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
762
763 addRulesForGOpcs(OpcList: {G_ZEXT})
764 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
765 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
766 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
767 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
768 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
769 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
770 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
771 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
772 // not extending S16 to S32 is questionable.
773 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
774 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
775 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
776 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
777
778 addRulesForGOpcs(OpcList: {G_SEXT})
779 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
780 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
781 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
782 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
783 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
784 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
785 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
786 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
787 // not extending S16 to S32 is questionable.
788 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
789 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
790 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
791 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
792
793 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
794 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
795 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
796 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
797 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
798
799 addRulesForGOpcs(OpcList: {G_ASSERT_ZEXT, G_ASSERT_SEXT}, FastTypes: Standard)
800 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Imm}})
801 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Imm}})
802 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Imm}})
803 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Imm}});
804
805 addRulesForGOpcs(OpcList: {G_ASSERT_ALIGN}, FastTypes: Standard)
806 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
807 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
808 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
809 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
810 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32}}})
811 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32}}})
812 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64}}})
813 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64}}});
814
815 // Atomic read-modify-write operations: result and value are always VGPR,
816 // pointer varies by address space.
817 addRulesForGOpcs(OpcList: {G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
818 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
819 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
820 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
821 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
822 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
823 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
824 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
825 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
826 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
827 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}});
828
829 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
830 bool HasAtomicBufferGlobalPkAddF16Insts =
831 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
832 ST->hasAtomicBufferGlobalPkAddF16Insts();
833 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
834 addRulesForGOpcs(OpcList: {G_ATOMICRMW_FADD})
835 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
836 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
837 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
838 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
839 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
840 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}})
841 .Any(Init: {.Predicate: {DivV2S16, P0, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP0, VgprV2S16}}},
842 STPred: HasAtomicFlatPkAdd16Insts)
843 .Any(Init: {.Predicate: {DivV2S16, P1, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP1, VgprV2S16}}},
844 STPred: HasAtomicBufferGlobalPkAddF16Insts)
845 .Any(Init: {.Predicate: {DivV2S16, P3, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP3, VgprV2S16}}},
846 STPred: HasAtomicDsPkAdd16Insts);
847
848 addRulesForGOpcs(OpcList: {G_ATOMIC_CMPXCHG})
849 .Any(Init: {.Predicate: {DivS32, P2}, .OperandMapping: {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
850 .Any(Init: {.Predicate: {DivS64, P2}, .OperandMapping: {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
851 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
852 .Any(Init: {.Predicate: {DivS64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
853
854 addRulesForGOpcs(OpcList: {G_AMDGPU_ATOMIC_CMPXCHG})
855 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, VgprV2S32}}})
856 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, VgprV2S32}}})
857 .Any(Init: {.Predicate: {DivS64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, VgprV2S64}}})
858 .Any(Init: {.Predicate: {DivS64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, VgprV2S64}}});
859
860 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, FastTypes: Standard)
861 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
862 {Vgpr32, Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
863 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64},
864 {Vgpr64, Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
865
866 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
867 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
868 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
869 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
870 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
871 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
872 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
873 FastTypes: Standard)
874 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
875 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
876
877 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
878 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
879 bool usesTrue16 = ST->useRealTrue16Insts();
880
881 Predicate isAlign16([](const MachineInstr &MI) -> bool {
882 return (*MI.memoperands_begin())->getAlign() >= Align(16);
883 });
884
885 Predicate isAlign4([](const MachineInstr &MI) -> bool {
886 return (*MI.memoperands_begin())->getAlign() >= Align(4);
887 });
888
889 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
890 return (*MI.memoperands_begin())->isAtomic();
891 });
892
893 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
894 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
895 });
896
897 Predicate isConst([](const MachineInstr &MI) -> bool {
898 // Address space in MMO be different then address space on pointer.
899 const MachineMemOperand *MMO = *MI.memoperands_begin();
900 const unsigned AS = MMO->getAddrSpace();
901 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
902 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
903 });
904
905 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
906 return (*MI.memoperands_begin())->isVolatile();
907 });
908
909 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
910 return (*MI.memoperands_begin())->isInvariant();
911 });
912
913 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
914 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
915 });
916
917 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
918 const MachineMemOperand *MMO = *MI.memoperands_begin();
919 return MMO->getAlign() >= Align(MMO->getSize().getValue());
920 });
921
922 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
923 const MachineMemOperand *MMO = *MI.memoperands_begin();
924 const unsigned MemSize = 8 * MMO->getSize().getValue();
925 return MemSize == 16 || MemSize == 8;
926 });
927
928 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
929 const MachineMemOperand *MMO = *MI.memoperands_begin();
930 return 8 * MMO->getSize().getValue() == 32;
931 });
932
933 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
934 (isConst || isInvMMO || isNoClobberMMO);
935
936 // clang-format off
937 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
938 addRulesForGOpcs(OpcList: {G_LOAD})
939 // flat, addrspace(0), never uniform - flat_load
940 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
941 .Any(Init: {.Predicate: {DivB32, P0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
942 .Any(Init: {.Predicate: {DivB64, P0}, .OperandMapping: {{VgprB64}, {VgprP0}}})
943 .Any(Init: {.Predicate: {DivB96, P0}, .OperandMapping: {{VgprB96}, {VgprP0}}})
944 .Any(Init: {.Predicate: {DivB128, P0}, .OperandMapping: {{VgprB128}, {VgprP0}}})
945
946 // global, addrspace(1)
947 // divergent - global_load
948 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
949 .Any(Init: {.Predicate: {DivB32, P1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
950 .Any(Init: {.Predicate: {DivB64, P1}, .OperandMapping: {{VgprB64}, {VgprP1}}})
951 .Any(Init: {.Predicate: {DivB96, P1}, .OperandMapping: {{VgprB96}, {VgprP1}}})
952 .Any(Init: {.Predicate: {DivB128, P1}, .OperandMapping: {{VgprB128}, {VgprP1}}})
953 .Any(Init: {.Predicate: {DivB256, P1}, .OperandMapping: {{VgprB256}, {VgprP1}, SplitLoad}})
954 .Any(Init: {.Predicate: {DivB512, P1}, .OperandMapping: {{VgprB512}, {VgprP1}, SplitLoad}})
955
956 // uniform - s_load
957 .Any(Init: {.Predicate: {{UniS16, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
958 .Any(Init: {.Predicate: {{UniS16, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
959 .Any(Init: {.Predicate: {{UniB32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
960 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
961 .Any(Init: {.Predicate: {{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
962 .Any(Init: {.Predicate: {{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}) //32-bit load
963 .Any(Init: {.Predicate: {{UniB64, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP1}}})
964 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, WidenLoad}}, STPred: !hasSMRDx3)
965 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, SplitLoad}}, STPred: !hasSMRDx3)
966 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}}}, STPred: hasSMRDx3)
967 .Any(Init: {.Predicate: {{UniB128, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP1}}})
968 .Any(Init: {.Predicate: {{UniB256, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
969 .Any(Init: {.Predicate: {{UniB512, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
970
971 // Uniform via global or buffer load, for example volatile or non-aligned
972 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
973 // selected as global_load, use SgprP1 for pointer instead to match
974 // patterns without flat-for-global, default for GFX7 and older.
975 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
976 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
977 .Any(Init: {.Predicate: {{UniS16, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
978 .Any(Init: {.Predicate: {{UniS16, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
979 .Any(Init: {.Predicate: {{UniB32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
980 .Any(Init: {.Predicate: {{UniB32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
981 .Any(Init: {.Predicate: {{UniB64, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP1}}})
982 .Any(Init: {.Predicate: {{UniB96, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP1}}})
983 .Any(Init: {.Predicate: {{UniB128, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP1}}})
984 .Any(Init: {.Predicate: {{UniB256, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP1}, SplitLoad}})
985 .Any(Init: {.Predicate: {{UniB512, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP1}, SplitLoad}})
986
987 // local, addrspace(3) - ds_load
988 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
989 .Any(Init: {.Predicate: {DivB32, P3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
990 .Any(Init: {.Predicate: {DivB64, P3}, .OperandMapping: {{VgprB64}, {VgprP3}}})
991 .Any(Init: {.Predicate: {DivB96, P3}, .OperandMapping: {{VgprB96}, {VgprP3}}})
992 .Any(Init: {.Predicate: {DivB128, P3}, .OperandMapping: {{VgprB128}, {VgprP3}}})
993
994 .Any(Init: {.Predicate: {UniS16, P3}, .OperandMapping: {{UniInVgprS16}, {SgprP3}}}, STPred: usesTrue16) // 16-bit load
995 .Any(Init: {.Predicate: {UniB32, P3}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
996 .Any(Init: {.Predicate: {UniB64, P3}, .OperandMapping: {{UniInVgprB64}, {VgprP3}}})
997 .Any(Init: {.Predicate: {UniB96, P3}, .OperandMapping: {{UniInVgprB96}, {VgprP3}}})
998 .Any(Init: {.Predicate: {UniB128, P3}, .OperandMapping: {{UniInVgprB128}, {VgprP3}}})
999
1000 // constant, addrspace(4)
1001 // divergent - global_load
1002 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
1003 .Any(Init: {.Predicate: {DivB32, P4}, .OperandMapping: {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1004 .Any(Init: {.Predicate: {DivB64, P4}, .OperandMapping: {{VgprB64}, {VgprP4}}})
1005 .Any(Init: {.Predicate: {DivB96, P4}, .OperandMapping: {{VgprB96}, {VgprP4}}})
1006 .Any(Init: {.Predicate: {DivB128, P4}, .OperandMapping: {{VgprB128}, {VgprP4}}})
1007 .Any(Init: {.Predicate: {DivB256, P4}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
1008 .Any(Init: {.Predicate: {DivB512, P4}, .OperandMapping: {{VgprB512}, {VgprP4}, SplitLoad}})
1009
1010 // uniform - s_load
1011 .Any(Init: {.Predicate: {{UniS16, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1012 .Any(Init: {.Predicate: {{UniS16, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1013 .Any(Init: {.Predicate: {{UniB32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1014 .Any(Init: {.Predicate: {{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1015 .Any(Init: {.Predicate: {{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) //32-bit load
1016 .Any(Init: {.Predicate: {{UniB64, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP4}}})
1017 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasSMRDx3)
1018 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasSMRDx3)
1019 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasSMRDx3)
1020 .Any(Init: {.Predicate: {{UniB128, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP4}}})
1021 .Any(Init: {.Predicate: {{UniB256, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
1022 .Any(Init: {.Predicate: {{UniB512, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
1023
1024 // uniform in vgpr - global_load or buffer_load
1025 .Any(Init: {.Predicate: {{UniS16, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1026 .Any(Init: {.Predicate: {{UniS16, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
1027 .Any(Init: {.Predicate: {{UniB32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1028 .Any(Init: {.Predicate: {{UniB32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1029 .Any(Init: {.Predicate: {{UniB64, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP4}}})
1030 .Any(Init: {.Predicate: {{UniB96, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP4}}})
1031 .Any(Init: {.Predicate: {{UniB128, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP4}}})
1032 .Any(Init: {.Predicate: {{UniB256, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1033 .Any(Init: {.Predicate: {{UniB512, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1034
1035 // private, addrspace(5), never uniform - scratch_load
1036 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16)
1037 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1038 .Any(Init: {.Predicate: {DivB64, P5}, .OperandMapping: {{VgprB64}, {VgprP5}}})
1039 .Any(Init: {.Predicate: {DivB96, P5}, .OperandMapping: {{VgprB96}, {VgprP5}}})
1040 .Any(Init: {.Predicate: {DivB128, P5}, .OperandMapping: {{VgprB128}, {VgprP5}}})
1041
1042 .Any(Init: {.Predicate: {DivS32, Ptr128}, .OperandMapping: {{Vgpr32}, {VgprPtr128}}});
1043
1044
1045 addRulesForGOpcs(OpcList: {G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1046 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0}}})
1047
1048 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1}}})
1049 .Any(Init: {.Predicate: {{UniS32, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1050 .Any(Init: {.Predicate: {{UniS32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}}}, STPred: hasSMRDSmall)
1051 .Any(Init: {.Predicate: {{UniS32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: !hasSMRDSmall)
1052 .Any(Init: {.Predicate: {{UniS32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: hasSMRDSmall)
1053
1054 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3}}})
1055 .Any(Init: {.Predicate: {UniS32, P3}, .OperandMapping: {{UniInVgprS32}, {VgprP3}}})
1056
1057 .Any(Init: {.Predicate: {DivS32, P4}, .OperandMapping: {{Vgpr32}, {VgprP4}}})
1058 .Any(Init: {.Predicate: {{UniS32, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1059 .Any(Init: {.Predicate: {{UniS32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}}}, STPred: hasSMRDSmall)
1060 .Any(Init: {.Predicate: {{UniS32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: !hasSMRDSmall)
1061 .Any(Init: {.Predicate: {{UniS32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: hasSMRDSmall)
1062
1063 .Any(Init: {.Predicate: {DivS32, P5}, .OperandMapping: {{Vgpr32}, {VgprP5}}});
1064
1065 addRulesForGOpcs(OpcList: {G_STORE})
1066 // addrspace(0)
1067 .Any(Init: {.Predicate: {S16, P0}, .OperandMapping: {{}, {Vgpr16, VgprP0}}}, STPred: usesTrue16) // 16-bit store
1068 .Any(Init: {.Predicate: {B32, P0}, .OperandMapping: {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1069 .Any(Init: {.Predicate: {B64, P0}, .OperandMapping: {{}, {VgprB64, VgprP0}}})
1070 .Any(Init: {.Predicate: {B96, P0}, .OperandMapping: {{}, {VgprB96, VgprP0}}})
1071 .Any(Init: {.Predicate: {B128, P0}, .OperandMapping: {{}, {VgprB128, VgprP0}}})
1072
1073 // addrspace(1), there are no stores to addrspace(4)
1074 // For targets:
1075 // - with "+flat-for-global" - global_store
1076 // - without(-flat-for-global) - buffer_store addr64
1077 .Any(Init: {.Predicate: {S16, DivP1}, .OperandMapping: {{}, {Vgpr16, VgprP1}}}, STPred: usesTrue16) // 16-bit store
1078 .Any(Init: {.Predicate: {B32, DivP1}, .OperandMapping: {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1079 .Any(Init: {.Predicate: {B64, DivP1}, .OperandMapping: {{}, {VgprB64, VgprP1}}})
1080 .Any(Init: {.Predicate: {B96, DivP1}, .OperandMapping: {{}, {VgprB96, VgprP1}}})
1081 .Any(Init: {.Predicate: {B128, DivP1}, .OperandMapping: {{}, {VgprB128, VgprP1}}})
1082
1083 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1084 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1085 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1086 .Any(Init: {.Predicate: {S16, UniP1}, .OperandMapping: {{}, {Vgpr16, SgprP1}}}, STPred: usesTrue16) // 16-bit store
1087 .Any(Init: {.Predicate: {B32, UniP1}, .OperandMapping: {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1088 .Any(Init: {.Predicate: {B64, UniP1}, .OperandMapping: {{}, {VgprB64, SgprP1}}})
1089 .Any(Init: {.Predicate: {B96, UniP1}, .OperandMapping: {{}, {VgprB96, SgprP1}}})
1090 .Any(Init: {.Predicate: {B128, UniP1}, .OperandMapping: {{}, {VgprB128, SgprP1}}})
1091
1092 // addrspace(3) and addrspace(5)
1093 .Any(Init: {.Predicate: {S16, Ptr32}, .OperandMapping: {{}, {Vgpr16, VgprPtr32}}}, STPred: usesTrue16) // 16-bit store
1094 .Any(Init: {.Predicate: {B32, Ptr32}, .OperandMapping: {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1095 .Any(Init: {.Predicate: {B64, Ptr32}, .OperandMapping: {{}, {VgprB64, VgprPtr32}}})
1096 .Any(Init: {.Predicate: {B96, Ptr32}, .OperandMapping: {{}, {VgprB96, VgprPtr32}}})
1097 .Any(Init: {.Predicate: {B128, Ptr32}, .OperandMapping: {{}, {VgprB128, VgprPtr32}}});
1098
1099 // clang-format on
1100
1101 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1102 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1103 FastTypes: StandardB)
1104 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1105 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1106 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1107 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1108 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1109 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1110 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1111 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1112
1113 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1114 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1115 FastTypes: StandardB)
1116 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1117 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1118
1119 addRulesForGOpcs(
1120 OpcList: {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1121 FastTypes: StandardB)
1122 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1123 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1124
1125 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1126 FastTypes: StandardB)
1127 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1128 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1129 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1130 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1131 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1132 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1133 .Any(Init: {.Predicate: {DivB160}, .OperandMapping: {{VgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1134 .Any(Init: {.Predicate: {UniB160},
1135 .OperandMapping: {{UniInVgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1136
1137 addRulesForGOpcs(
1138 OpcList: {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1139 FastTypes: StandardB)
1140 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1141 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1142 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1143 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1144 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1145 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1146
1147 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1148 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1149 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1150 G_AMDGPU_TBUFFER_STORE_FORMAT,
1151 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1152 .Any(Init: {.Predicate: {B32}, .OperandMapping: {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1153 .Any(Init: {.Predicate: {B64}, .OperandMapping: {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1154 .Any(Init: {.Predicate: {B96}, .OperandMapping: {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1155 .Any(Init: {.Predicate: {B128}, .OperandMapping: {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1156
1157 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1158 // address components are VGPR.
1159 //
1160 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1161 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1162 // idxen_imm
1163 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_FADD})
1164 .Any(Init: {.Predicate: {S32, S32, V4S32, S32, S32, S32},
1165 .OperandMapping: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1166 .Any(Init: {.Predicate: {S64, S64, V4S32, S32, S32, S32},
1167 .OperandMapping: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1168 .Any(Init: {.Predicate: {V2S16, V2S16, V4S32, S32, S32, S32},
1169 .OperandMapping: {{VgprV2S16},
1170 {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1171
1172 addRulesForGOpcs(OpcList: {G_PTR_ADD})
1173 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1174 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1175 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1176 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1177
1178 addRulesForGOpcs(OpcList: {G_INTTOPTR})
1179 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
1180 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
1181 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
1182 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
1183 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
1184 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
1185
1186 addRulesForGOpcs(OpcList: {G_PTRTOINT})
1187 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
1188 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
1189 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
1190 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
1191 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
1192 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
1193
1194 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1195 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1196 addRulesForGOpcs(OpcList: {G_PTRMASK})
1197 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
1198 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
1199 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {SgprP3, Sgpr32}}})
1200 .Any(Init: {.Predicate: {DivP3}, .OperandMapping: {{VgprP3}, {VgprP3, Vgpr32}}});
1201
1202 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}});
1203
1204 addRulesForGOpcs(OpcList: {G_BITREVERSE}, FastTypes: Standard)
1205 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1206 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1207 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
1208 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1209
1210 addRulesForGOpcs(OpcList: {G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1211 G_CTTZ_ZERO_UNDEF})
1212 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
1213 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1214 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
1215 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}, SplitBitCount64To32}});
1216
1217 addRulesForGOpcs(OpcList: {G_FENCE}).Any(Init: {.Predicate: {{}}, .OperandMapping: {{}, {}}});
1218
1219 addRulesForGOpcs(OpcList: {G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, FastTypes: Standard)
1220 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
1221
1222 addRulesForGOpcs(OpcList: {G_BLOCK_ADDR}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1223
1224 addRulesForGOpcs(OpcList: {G_GLOBAL_VALUE})
1225 .Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}})
1226 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {}}})
1227 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {}}})
1228 .Any(Init: {.Predicate: {UniP4}, .OperandMapping: {{SgprP4}, {}}})
1229 .Any(Init: {.Predicate: {UniP8}, .OperandMapping: {{SgprP8}, {}}});
1230
1231 addRulesForGOpcs(OpcList: {G_AMDGPU_WAVE_ADDRESS}).Any(Init: {.Predicate: {UniP5}, .OperandMapping: {{SgprP5}, {}}});
1232
1233 addRulesForGOpcs(OpcList: {G_SI_CALL})
1234 .Any(Init: {.Predicate: {_, UniP0}, .OperandMapping: {{None}, {SgprP0}}})
1235 .Any(Init: {.Predicate: {_, DivP0}, .OperandMapping: {{None}, {SgprP0Call_WF}}})
1236 .Any(Init: {.Predicate: {_, UniP4}, .OperandMapping: {{None}, {SgprP4}}})
1237 .Any(Init: {.Predicate: {_, DivP4}, .OperandMapping: {{None}, {SgprP4Call_WF}}});
1238
1239 bool hasSALUFloat = ST->hasSALUFloatInsts();
1240
1241 addRulesForGOpcs(OpcList: {G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, FastTypes: Standard)
1242 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1243 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1244 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1245 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1246 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1247 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1248 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1249 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1250 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, STPred: !hasSALUFloat)
1251 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
1252 STPred: hasSALUFloat)
1253 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1254
1255 addRulesForGOpcs(OpcList: {G_FSUB, G_STRICT_FSUB}, FastTypes: Standard)
1256 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1257 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1258 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1259 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1260 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1261 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1262
1263 addRulesForGOpcs(OpcList: {G_FMAD}, FastTypes: Standard)
1264 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1265 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1266 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1267 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1268
1269 addRulesForGOpcs(OpcList: {G_FLDEXP, G_STRICT_FLDEXP}, FastTypes: Standard)
1270 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1271 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1272 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1273 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1274 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1275 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
1276
1277 addRulesForGOpcs(OpcList: {G_FMA, G_STRICT_FMA}, FastTypes: Standard)
1278 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1279 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1280 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1281 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1282 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
1283 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1284 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1285 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1286 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1287 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1288 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1289 .Uni(Ty: V2S16,
1290 RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
1291 STPred: hasSALUFloat)
1292 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
1293 STPred: !hasSALUFloat);
1294
1295 addRulesForGOpcs(OpcList: {G_AMDGPU_FMED3}, FastTypes: Standard)
1296 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1297 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1298 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1299 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1300
1301 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1302 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1303 // instructions on SALU.
1304 addRulesForGOpcs(OpcList: {G_AMDGPU_SMED3}, FastTypes: Standard)
1305 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1306 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1307
1308 // FNEG and FABS are either folded as source modifiers or can be selected as
1309 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1310 // targets without SALU float we still select them as VGPR since there would
1311 // be no real sgpr use.
1312 addRulesForGOpcs(OpcList: {G_FNEG, G_FABS}, FastTypes: Standard)
1313 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasSALUFloat)
1314 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1315 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1316 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}}, STPred: !hasSALUFloat)
1317 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1318 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1319 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1320 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1321 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}}, STPred: !hasSALUFloat)
1322 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, STPred: hasSALUFloat)
1323 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1324 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1325 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1326
1327 addRulesForGOpcs(OpcList: {G_FCANONICALIZE}, FastTypes: Standard)
1328 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1329 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1330 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1331 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1332 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1333 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1334 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
1335 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1336 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1337 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1338
1339 bool hasPST = ST->hasPseudoScalarTrans();
1340 addRulesForGOpcs(OpcList: {G_FSQRT}, FastTypes: Standard)
1341 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1342 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasPST)
1343 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasPST);
1344
1345 addRulesForGOpcs(OpcList: {G_FPTOUI, G_FPTOSI})
1346 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1347 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1348 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1349 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat)
1350 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1351 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1352 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1353 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1354 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1355 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}});
1356
1357 addRulesForGOpcs(OpcList: {G_UITOFP, G_SITOFP})
1358 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1359 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1360 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1361 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1362 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1363 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1364 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1365 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1366 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1367 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}});
1368
1369 addRulesForGOpcs(OpcList: {G_FPEXT})
1370 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1371 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1372 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}})
1373 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1374 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat);
1375
1376 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_PK_I16_I32}, FastTypes: Standard)
1377 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1378 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {Vgpr32, Vgpr32}});
1379
1380 addRulesForGOpcs(OpcList: {G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, FastTypes: Standard)
1381 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1382 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
1383
1384 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1385
1386 addRulesForGOpcs(OpcList: {G_FMINIMUM, G_FMAXIMUM}, FastTypes: Standard)
1387 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUMinimumMaximumInsts)
1388 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUMinimumMaximumInsts)
1389 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1390 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUMinimumMaximumInsts)
1391 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUMinimumMaximumInsts)
1392 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1393 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1394 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1395 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1396 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1397
1398 addRulesForGOpcs(OpcList: {G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1399 FastTypes: Standard)
1400 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1401 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1402 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1403 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1404 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1405 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1406 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1407 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1408 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1409 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1410
1411 addRulesForGOpcs(OpcList: {G_FPTRUNC})
1412 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1413 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1414 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1415 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {VgprV2S32}}})
1416 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
1417 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1418 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat);
1419
1420 addRulesForGOpcs(OpcList: {G_IS_FPCLASS})
1421 .Any(Init: {.Predicate: {DivS1, S16}, .OperandMapping: {{Vcc}, {Vgpr16}}})
1422 .Any(Init: {.Predicate: {UniS1, S16}, .OperandMapping: {{UniInVcc}, {Vgpr16}}})
1423 .Any(Init: {.Predicate: {DivS1, S32}, .OperandMapping: {{Vcc}, {Vgpr32}}})
1424 .Any(Init: {.Predicate: {UniS1, S32}, .OperandMapping: {{UniInVcc}, {Vgpr32}}})
1425 .Any(Init: {.Predicate: {DivS1, S64}, .OperandMapping: {{Vcc}, {Vgpr64}}})
1426 .Any(Init: {.Predicate: {UniS1, S64}, .OperandMapping: {{UniInVcc}, {Vgpr64}}});
1427
1428 addRulesForGOpcs(OpcList: {G_FCMP}, FastTypes: Standard)
1429 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1430 STPred: hasSALUFloat)
1431 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1432 STPred: !hasSALUFloat)
1433 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
1434 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1435 STPred: hasSALUFloat)
1436 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1437 STPred: !hasSALUFloat)
1438 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
1439 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1440 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}});
1441
1442 addRulesForGOpcs(OpcList: {G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1443 G_FEXP2, G_FLOG2},
1444 FastTypes: Standard)
1445 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1446 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1447 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1448 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1449 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1450 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1451
1452 using namespace Intrinsic;
1453
1454 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}});
1455
1456 addRulesForIOpcs(OpcList: {amdgcn_s_getreg}).Any(Init: {.Predicate: {}, .OperandMapping: {{Sgpr32}, {IntrId, Imm}}});
1457
1458 addRulesForIOpcs(OpcList: {amdgcn_s_setreg})
1459 .Any(Init: {.Predicate: {_, _, S32}, .OperandMapping: {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1460
1461 addRulesForIOpcs(OpcList: {amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1462 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Imm, SgprB32_M0}}});
1463
1464 addRulesForIOpcs(OpcList: {amdgcn_s_sendmsg_rtn})
1465 .Any(Init: {.Predicate: {S32}, .OperandMapping: {{Sgpr32}, {}}})
1466 .Any(Init: {.Predicate: {S64}, .OperandMapping: {{Sgpr64}, {}}});
1467
1468 addRulesForIOpcs(OpcList: {amdgcn_s_memrealtime, amdgcn_s_memtime}, FastTypes: Standard)
1469 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId}});
1470
1471 addRulesForIOpcs(OpcList: {amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1472 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1473 FastTypes: Standard)
1474 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId}});
1475
1476 // Intrinsics with no register operands.
1477 addRulesForIOpcs(OpcList: {amdgcn_endpgm,
1478 amdgcn_s_barrier,
1479 amdgcn_s_barrier_signal,
1480 amdgcn_s_barrier_wait,
1481 amdgcn_s_nop,
1482 amdgcn_s_sethalt,
1483 amdgcn_s_setprio,
1484 amdgcn_s_sleep,
1485 amdgcn_s_ttracedata_imm,
1486 amdgcn_s_wait_asynccnt,
1487 amdgcn_s_wait_bvhcnt,
1488 amdgcn_s_wait_dscnt,
1489 amdgcn_s_wait_event,
1490 amdgcn_s_wait_event_export_ready,
1491 amdgcn_s_wait_expcnt,
1492 amdgcn_s_wait_kmcnt,
1493 amdgcn_s_wait_loadcnt,
1494 amdgcn_s_wait_samplecnt,
1495 amdgcn_s_wait_storecnt,
1496 amdgcn_s_wait_tensorcnt,
1497 amdgcn_s_waitcnt,
1498 amdgcn_wave_barrier})
1499 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}}});
1500
1501 addRulesForIOpcs(OpcList: {amdgcn_s_ttracedata}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_M0}}});
1502
1503 addRulesForIOpcs(OpcList: {amdgcn_s_sleep_var})
1504 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_ReadFirstLane}}});
1505
1506 addRulesForIOpcs(OpcList: {amdgcn_s_prefetch_data})
1507 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB64_ReadFirstLane, SgprB32_ReadFirstLane}}});
1508
1509 addRulesForIOpcs(OpcList: {amdgcn_class})
1510 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1511 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1512 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1513 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1514 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1515 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1516
1517 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1518 addRulesForIOpcs(OpcList: {amdgcn_end_cf})
1519 .Any(Init: {.Predicate: {_, UniS32}, .OperandMapping: {{}, {IntrId, Sgpr32}}})
1520 .Any(Init: {.Predicate: {_, UniS64}, .OperandMapping: {{}, {IntrId, Sgpr64}}});
1521
1522 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
1523 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1524 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1525
1526 addRulesForIOpcs(OpcList: {amdgcn_exp})
1527 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32},
1528 .OperandMapping: {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1529
1530 addRulesForIOpcs(OpcList: {amdgcn_exp_compr})
1531 .Any(Init: {.Predicate: {_, _, _, V2S16}, .OperandMapping: {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1532
1533 addRulesForIOpcs(OpcList: {amdgcn_exp_row})
1534 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32, _, S32},
1535 .OperandMapping: {{},
1536 {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm,
1537 SgprB32_M0}}});
1538
1539 addRulesForIOpcs(OpcList: {amdgcn_lds_direct_load}, FastTypes: StandardB)
1540 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, SgprB32_M0}});
1541
1542 addRulesForIOpcs(OpcList: {amdgcn_lds_param_load}, FastTypes: Standard)
1543 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1544
1545 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
1546 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1547
1548 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
1549 .Any(Init: {.Predicate: {UniB32, _, DivB32}, .OperandMapping: {{}, {SgprB32, None, VgprB32}}})
1550 // this should not exist in the first place, it is from call lowering
1551 // readfirstlaning just in case register is not in sgpr.
1552 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
1553
1554 addRulesForIOpcs(OpcList: {amdgcn_readlane}, FastTypes: StandardB)
1555 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, VgprB32, SgprB32_ReadFirstLane}});
1556
1557 addRulesForIOpcs(OpcList: {amdgcn_writelane}, FastTypes: StandardB)
1558 .Div(Ty: B32,
1559 RuleApplyIDs: {{VgprB32},
1560 {IntrId, SgprB32_ReadFirstLane, SgprB32_ReadFirstLane, VgprB32}});
1561
1562 addRulesForIOpcs(OpcList: {amdgcn_permlane16, amdgcn_permlanex16}, FastTypes: Standard)
1563 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
1564 {IntrId, Vgpr32, Vgpr32, SgprB32_ReadFirstLane,
1565 SgprB32_ReadFirstLane, Imm, Imm}});
1566
1567 addRulesForIOpcs(OpcList: {amdgcn_perm}, FastTypes: Standard)
1568 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1569 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1570
1571 addRulesForIOpcs(OpcList: {amdgcn_wave_reduce_umax, amdgcn_wave_reduce_umin}, FastTypes: Standard)
1572 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
1573 .Div(Ty: S32, RuleApplyIDs: {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1574 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64}})
1575 .Div(Ty: S64, RuleApplyIDs: {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1576
1577 addRulesForIOpcs(OpcList: {amdgcn_bitop3, amdgcn_fmad_ftz}, FastTypes: Standard)
1578 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1579 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1580 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1581 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1582
1583 addRulesForIOpcs(OpcList: {amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1584 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1585 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1586 FastTypes: Standard)
1587 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1588 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1589
1590 addRulesForIOpcs(OpcList: {amdgcn_mul_u24, amdgcn_mul_i24}, FastTypes: Standard)
1591 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1592 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1593 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1594 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1595
1596 addRulesForIOpcs(OpcList: {amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
1597 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
1598 amdgcn_mulhi_u24},
1599 FastTypes: Standard)
1600 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1601 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1602
1603 addRulesForIOpcs(OpcList: {amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
1604 amdgcn_fma_legacy},
1605 FastTypes: Standard)
1606 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1607 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1608
1609 addRulesForIOpcs(OpcList: {amdgcn_frexp_mant, amdgcn_fract}, FastTypes: Standard)
1610 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1611 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1612 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1613 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1614 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
1615 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
1616
1617 addRulesForIOpcs(OpcList: {amdgcn_prng_b32})
1618 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
1619 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}});
1620
1621 addRulesForIOpcs(OpcList: {amdgcn_sffbh}, FastTypes: Standard)
1622 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
1623 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
1624
1625 addRulesForIOpcs(OpcList: {amdgcn_ubfe, amdgcn_sbfe}, FastTypes: Standard)
1626 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1627 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1628 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1629 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1630
1631 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
1632 amdgcn_cvt_pknorm_u16, amdgcn_cvt_pkrtz},
1633 FastTypes: Standard)
1634 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1635 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1636
1637 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b64})
1638 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1639 .Any(Init: {.Predicate: {DivB32}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1}}});
1640
1641 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b128})
1642 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1643 .Any(Init: {.Predicate: {DivB128}, .OperandMapping: {{VgprB128}, {IntrId, SgprP1}}});
1644
1645 addRulesForIOpcs(OpcList: {amdgcn_global_atomic_ordered_add_b64})
1646 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1647
1648 addRulesForIOpcs(
1649 OpcList: {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, FastTypes: Standard)
1650 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1651
1652 addRulesForIOpcs(OpcList: {amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1653 FastTypes: Standard)
1654 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1655
1656 addRulesForIOpcs(OpcList: {amdgcn_raw_buffer_load_lds})
1657 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1658
1659 addRulesForIOpcs(OpcList: {amdgcn_struct_buffer_load_lds})
1660 .Any(Init: {.Predicate: {_},
1661 .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1662
1663 addRulesForIOpcs(OpcList: {amdgcn_raw_ptr_buffer_load_lds})
1664 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1665
1666 addRulesForIOpcs(OpcList: {amdgcn_struct_ptr_buffer_load_lds})
1667 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1668
1669 addRulesForIOpcs(OpcList: {amdgcn_global_load_lds})
1670 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1, SgprB32_M0}}});
1671
1672 addRulesForIOpcs(OpcList: {amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1673 amdgcn_strict_wqm},
1674 FastTypes: StandardB)
1675 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
1676 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32}})
1677 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}})
1678 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64}})
1679 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, VgprB96}})
1680 .Uni(Ty: B96, RuleApplyIDs: {{SgprB96}, {IntrId, SgprB96}})
1681 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB128}})
1682 .Uni(Ty: B128, RuleApplyIDs: {{SgprB128}, {IntrId, SgprB128}})
1683 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {IntrId, SgprB256}}})
1684 .Any(Init: {.Predicate: {DivB256}, .OperandMapping: {{VgprB256}, {IntrId, VgprB256}}})
1685 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {IntrId, SgprB512}}})
1686 .Any(Init: {.Predicate: {DivB512}, .OperandMapping: {{VgprB512}, {IntrId, VgprB512}}});
1687
1688 addRulesForIOpcs(OpcList: {amdgcn_wqm_demote}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Vcc}}});
1689
1690 addRulesForIOpcs(OpcList: {amdgcn_live_mask, amdgcn_ps_live})
1691 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {}}});
1692
1693 addRulesForIOpcs(OpcList: {amdgcn_mov_dpp, amdgcn_mov_dpp8}, FastTypes: StandardB)
1694 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
1695 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}});
1696
1697 addRulesForIOpcs(OpcList: {amdgcn_sin, amdgcn_cos}, FastTypes: Standard)
1698 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1699 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1700 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1701 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}});
1702
1703 addRulesForIOpcs(OpcList: {amdgcn_trig_preop}, FastTypes: Standard)
1704 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
1705 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
1706
1707 addRulesForIOpcs(OpcList: {amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
1708 FastTypes: Standard)
1709 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1710 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32}});
1711
1712 addRulesForIOpcs(OpcList: {amdgcn_ds_append, amdgcn_ds_consume}, FastTypes: Standard)
1713 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, SgprB32_M0}})
1714 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, SgprB32_M0}});
1715
1716 addRulesForIOpcs(
1717 OpcList: {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, FastTypes: Standard)
1718 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
1719
1720 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop1_rtn}, FastTypes: Standard)
1721 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1722
1723 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop2_rtn}, FastTypes: Standard)
1724 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1725
1726 addRulesForIOpcs(OpcList: {amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, FastTypes: Standard)
1727 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
1728
1729 addRulesForIOpcs(OpcList: {amdgcn_ds_swizzle}, FastTypes: Standard)
1730 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1731 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
1732
1733 addRulesForIOpcs(OpcList: {amdgcn_permlane16_var, amdgcn_permlanex16_var}, FastTypes: Standard)
1734 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1735
1736 addRulesForIOpcs(OpcList: {amdgcn_permlane16_swap, amdgcn_permlane32_swap}, FastTypes: Standard)
1737 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1738
1739 addRulesForIOpcs(OpcList: {amdgcn_permlane64}, FastTypes: StandardB)
1740 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}});
1741
1742 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1743 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprP3}}});
1744
1745 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr6_b96})
1746 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprP3}}});
1747
1748 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr16_b64})
1749 .Any(Init: {.Predicate: {DivV4S16}, .OperandMapping: {{VgprV4S16}, {IntrId, VgprP3}}});
1750
1751 addRulesForIOpcs(OpcList: {amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
1752 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
1753 FastTypes: Standard)
1754 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1755 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1756
1757 addRulesForIOpcs(OpcList: {amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
1758 FastTypes: Standard)
1759 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1760 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1761
1762 addRulesForIOpcs(OpcList: {amdgcn_div_fmas}, FastTypes: Standard)
1763 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
1764 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
1765 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
1766 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
1767
1768 addRulesForIOpcs(OpcList: {amdgcn_div_fixup}, FastTypes: Standard)
1769 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1770 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1771 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1772 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1773 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
1774 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
1775
1776 addRulesForIOpcs(OpcList: {amdgcn_div_scale}, FastTypes: Standard)
1777 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
1778 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
1779 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
1780 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
1781
1782 addRulesForIOpcs(OpcList: {amdgcn_udot2, amdgcn_sdot2}, FastTypes: Standard)
1783 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}})
1784 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
1785
1786 addRulesForIOpcs(OpcList: {amdgcn_sudot4, amdgcn_sudot8}, FastTypes: Standard)
1787 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
1788 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
1789
1790} // end initialize rules
1791