1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128);
73 case Ptr32:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
75 case Ptr64:
76 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
77 case Ptr128:
78 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
79 case V2S32:
80 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
81 case V3S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
83 case V4S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
85 case B32:
86 return MRI.getType(Reg).getSizeInBits() == 32;
87 case B64:
88 return MRI.getType(Reg).getSizeInBits() == 64;
89 case B96:
90 return MRI.getType(Reg).getSizeInBits() == 96;
91 case B128:
92 return MRI.getType(Reg).getSizeInBits() == 128;
93 case B256:
94 return MRI.getType(Reg).getSizeInBits() == 256;
95 case B512:
96 return MRI.getType(Reg).getSizeInBits() == 512;
97 case UniS1:
98 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg);
99 case UniS16:
100 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg);
101 case UniS32:
102 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg);
103 case UniS64:
104 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg);
105 case UniS128:
106 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg);
107 case UniP0:
108 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg);
109 case UniP1:
110 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg);
111 case UniP2:
112 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isUniform(V: Reg);
113 case UniP3:
114 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg);
115 case UniP4:
116 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg);
117 case UniP5:
118 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg);
119 case UniP8:
120 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128) && MUI.isUniform(V: Reg);
121 case UniPtr32:
122 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg);
123 case UniPtr64:
124 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg);
125 case UniPtr128:
126 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg);
127 case UniV2S16:
128 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg);
129 case UniV2S32:
130 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isUniform(V: Reg);
131 case UniB32:
132 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg);
133 case UniB64:
134 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg);
135 case UniB96:
136 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg);
137 case UniB128:
138 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg);
139 case UniB256:
140 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg);
141 case UniB512:
142 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg);
143 case UniBRC: {
144 if (!MUI.isUniform(V: Reg))
145 return false;
146 // Check if there is SGPR register class of same size as the LLT.
147 const SIRegisterInfo *TRI =
148 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
149 // There is no 16 bit SGPR register class. Extra size check is required
150 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
151 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
152 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize);
153 }
154 case DivS1:
155 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg);
156 case DivS16:
157 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg);
158 case DivS32:
159 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg);
160 case DivS64:
161 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg);
162 case DivS128:
163 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg);
164 case DivP0:
165 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg);
166 case DivP1:
167 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg);
168 case DivP2:
169 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isDivergent(V: Reg);
170 case DivP3:
171 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg);
172 case DivP4:
173 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg);
174 case DivP5:
175 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg);
176 case DivPtr32:
177 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg);
178 case DivPtr64:
179 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg);
180 case DivPtr128:
181 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg);
182 case DivV2S16:
183 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
184 case DivV2S32:
185 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
186 case DivB32:
187 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg);
188 case DivB64:
189 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg);
190 case DivB96:
191 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg);
192 case DivB128:
193 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg);
194 case DivB256:
195 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg);
196 case DivB512:
197 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg);
198 case DivBRC: {
199 if (!MUI.isDivergent(V: Reg))
200 return false;
201 // Check if there is VGPR register class of same size as the LLT.
202 const SIRegisterInfo *TRI =
203 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
204 return TRI->getSGPRClassForBitWidth(BitWidth: MRI.getType(Reg).getSizeInBits());
205 }
206 case _:
207 return true;
208 default:
209 llvm_unreachable("missing matchUniformityAndLLT");
210 }
211}
212
213bool PredicateMapping::match(const MachineInstr &MI,
214 const MachineUniformityInfo &MUI,
215 const MachineRegisterInfo &MRI) const {
216 // Check LLT signature.
217 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
218 if (OpUniformityAndTypes[i] == _) {
219 if (MI.getOperand(i).isReg())
220 return false;
221 continue;
222 }
223
224 // Remaining IDs check registers.
225 if (!MI.getOperand(i).isReg())
226 return false;
227
228 if (!matchUniformityAndLLT(Reg: MI.getOperand(i).getReg(),
229 UniID: OpUniformityAndTypes[i], MUI, MRI))
230 return false;
231 }
232
233 // More complex check.
234 if (TestFunc)
235 return TestFunc(MI);
236
237 return true;
238}
239
240SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
241
242SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
243 : FastTypes(FastTypes) {}
244
245UniformityLLTOpPredicateID LLTToId(LLT Ty) {
246 if (Ty == LLT::scalar(SizeInBits: 16))
247 return S16;
248 if (Ty == LLT::scalar(SizeInBits: 32))
249 return S32;
250 if (Ty == LLT::scalar(SizeInBits: 64))
251 return S64;
252 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
253 return V2S16;
254 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
255 return V2S32;
256 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
257 return V3S32;
258 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
259 return V4S32;
260 return _;
261}
262
263UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
264 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
265 isAnyPtr(Ty, Width: 32))
266 return B32;
267 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
268 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
269 return B64;
270 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
271 return B96;
272 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || isAnyPtr(Ty, Width: 128))
273 return B128;
274 return _;
275}
276
277const RegBankLLTMapping *
278SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
279 const MachineRegisterInfo &MRI,
280 const MachineUniformityInfo &MUI) const {
281 // Search in "Fast Rules".
282 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
283 // slot that could "match fast Predicate". If not, InvalidMapping is
284 // returned which results in failure, does not search "Slow Rules".
285 if (FastTypes != NoFastRules) {
286 Register Reg = MI.getOperand(i: 0).getReg();
287 int Slot;
288 if (FastTypes == StandardB)
289 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
290 else
291 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
292
293 if (Slot != -1)
294 return MUI.isUniform(V: Reg) ? &Uni[Slot] : &Div[Slot];
295 }
296
297 // Slow search for more complex rules.
298 for (const RegBankLegalizeRule &Rule : Rules) {
299 if (Rule.Predicate.match(MI, MUI, MRI))
300 return &Rule.OperandMapping;
301 }
302
303 return nullptr;
304}
305
306void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
307 Rules.push_back(Elt: Rule);
308}
309
310void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
311 RegBankLLTMapping RuleApplyIDs) {
312 int Slot = getFastPredicateSlot(Ty);
313 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
314 Div[Slot] = std::move(RuleApplyIDs);
315}
316
317void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
318 RegBankLLTMapping RuleApplyIDs) {
319 int Slot = getFastPredicateSlot(Ty);
320 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
321 Uni[Slot] = std::move(RuleApplyIDs);
322}
323
324int SetOfRulesForOpcode::getFastPredicateSlot(
325 UniformityLLTOpPredicateID Ty) const {
326 switch (FastTypes) {
327 case Standard: {
328 switch (Ty) {
329 case S32:
330 return 0;
331 case S16:
332 return 1;
333 case S64:
334 return 2;
335 case V2S16:
336 return 3;
337 default:
338 return -1;
339 }
340 }
341 case StandardB: {
342 switch (Ty) {
343 case B32:
344 return 0;
345 case B64:
346 return 1;
347 case B96:
348 return 2;
349 case B128:
350 return 3;
351 default:
352 return -1;
353 }
354 }
355 case Vector: {
356 switch (Ty) {
357 case S32:
358 return 0;
359 case V2S32:
360 return 1;
361 case V3S32:
362 return 2;
363 case V4S32:
364 return 3;
365 default:
366 return -1;
367 }
368 }
369 default:
370 return -1;
371 }
372}
373
374RegBankLegalizeRules::RuleSetInitializer
375RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
376 FastRulesTypes FastTypes) {
377 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
378}
379
380RegBankLegalizeRules::RuleSetInitializer
381RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
382 FastRulesTypes FastTypes) {
383 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
384}
385
386const SetOfRulesForOpcode *
387RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
388 unsigned Opc = MI.getOpcode();
389 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
390 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
391 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
392 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
393 auto IRAIt = IRulesAlias.find(Val: IntrID);
394 if (IRAIt == IRulesAlias.end())
395 return nullptr;
396 return &IRules.at(Val: IRAIt->second);
397 }
398
399 auto GRAIt = GRulesAlias.find(Val: Opc);
400 if (GRAIt == GRulesAlias.end())
401 return nullptr;
402 return &GRules.at(Val: GRAIt->second);
403}
404
405// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
406class Predicate {
407private:
408 struct Elt {
409 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
410 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
411 // Sequences of && and || will be represented by jumps, for example:
412 // (A && B && ... X) or (A && B && ... X) || Y
413 // A == true jump to B
414 // A == false jump to end or Y, result is A(false) or Y
415 // (A || B || ... X) or (A || B || ... X) && Y
416 // A == true jump to end or Y, result is A(true) or Y
417 // A == false jump to B
418 // Notice that when negating expression, we simply flip Neg on each Pred
419 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
420 std::function<bool(const MachineInstr &)> Pred;
421 bool Neg; // Neg of Pred is calculated before jump
422 unsigned TJumpOffset;
423 unsigned FJumpOffset;
424 };
425
426 SmallVector<Elt, 8> Expression;
427
428 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
429
430public:
431 Predicate(std::function<bool(const MachineInstr &)> Pred) {
432 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
433 };
434
435 bool operator()(const MachineInstr &MI) const {
436 unsigned Idx = 0;
437 unsigned ResultIdx = Expression.size();
438 bool Result;
439 do {
440 Result = Expression[Idx].Pred(MI);
441 Result = Expression[Idx].Neg ? !Result : Result;
442 if (Result) {
443 Idx += Expression[Idx].TJumpOffset;
444 } else {
445 Idx += Expression[Idx].FJumpOffset;
446 }
447 } while ((Idx != ResultIdx));
448
449 return Result;
450 };
451
452 Predicate operator!() const {
453 SmallVector<Elt, 8> NegExpression;
454 for (const Elt &ExprElt : Expression) {
455 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
456 .FJumpOffset: ExprElt.TJumpOffset});
457 }
458 return Predicate(std::move(NegExpression));
459 };
460
461 Predicate operator&&(const Predicate &RHS) const {
462 SmallVector<Elt, 8> AndExpression = Expression;
463
464 unsigned RHSSize = RHS.Expression.size();
465 unsigned ResultIdx = Expression.size();
466 for (unsigned i = 0; i < ResultIdx; ++i) {
467 // LHS results in false, whole expression results in false.
468 if (i + AndExpression[i].FJumpOffset == ResultIdx)
469 AndExpression[i].FJumpOffset += RHSSize;
470 }
471
472 AndExpression.append(RHS: RHS.Expression);
473
474 return Predicate(std::move(AndExpression));
475 }
476
477 Predicate operator||(const Predicate &RHS) const {
478 SmallVector<Elt, 8> OrExpression = Expression;
479
480 unsigned RHSSize = RHS.Expression.size();
481 unsigned ResultIdx = Expression.size();
482 for (unsigned i = 0; i < ResultIdx; ++i) {
483 // LHS results in true, whole expression results in true.
484 if (i + OrExpression[i].TJumpOffset == ResultIdx)
485 OrExpression[i].TJumpOffset += RHSSize;
486 }
487
488 OrExpression.append(RHS: RHS.Expression);
489
490 return Predicate(std::move(OrExpression));
491 }
492};
493
494// Initialize rules
495RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
496 MachineRegisterInfo &_MRI)
497 : ST(&_ST), MRI(&_MRI) {
498
499 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
500 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
501 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
502 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
503 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
504 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
505 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
506 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}})
507 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
508
509 addRulesForGOpcs(OpcList: {G_UADDO, G_USUBO}, FastTypes: Standard)
510 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
511 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
512
513 addRulesForGOpcs(OpcList: {G_UADDE, G_USUBE}, FastTypes: Standard)
514 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
515 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
516
517 addRulesForGOpcs(OpcList: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, FastTypes: Standard)
518 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
519 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
520 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
521 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
522 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
523 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
524
525 bool HasVecMulU64 = ST->hasVectorMulU64();
526 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard)
527 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
528 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
529 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
530 .Uni(Ty: S64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
531 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
532 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
533 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
534 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}}, STPred: HasVecMulU64)
535 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, STPred: !HasVecMulU64);
536
537 bool hasMulHi = ST->hasScalarMulHiInsts();
538 addRulesForGOpcs(OpcList: {G_UMULH, G_SMULH}, FastTypes: Standard)
539 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
540 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasMulHi)
541 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasMulHi);
542
543 addRulesForGOpcs(OpcList: {G_AMDGPU_MAD_U64_U32}, FastTypes: Standard)
544 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
545 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
546
547 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
548 addRulesForGOpcs(OpcList: {G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, FastTypes: Standard)
549 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, STPred: HasScalarSMulU64)
550 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
551
552 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
553 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
554 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
555 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
556 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
557 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
558 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
559 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
560 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
561
562 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
563 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
564 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
565 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
566 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
567 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
568 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
569 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
570 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
571
572 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
573 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
574 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
575 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
576 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
577 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
578 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
579 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
580 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
581
582 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
583 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
584 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
585 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
586 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
587 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
588 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
589 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
590 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
591
592 addRulesForGOpcs(OpcList: {G_FSHR}, FastTypes: Standard)
593 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
594 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
595
596 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
597
598 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
599 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
600 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
601 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
602 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
603
604 addRulesForGOpcs(OpcList: {G_SMIN, G_SMAX}, FastTypes: Standard)
605 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
606 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
607 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
608 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
609 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
610 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
611
612 addRulesForGOpcs(OpcList: {G_UMIN, G_UMAX}, FastTypes: Standard)
613 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
614 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
615 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
616 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
617 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
618 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
619
620 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
621 // and G_FREEZE here, rest is trivially regbankselected earlier
622 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
623 addRulesForGOpcs(OpcList: {G_CONSTANT})
624 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}});
625 addRulesForGOpcs(OpcList: {G_FREEZE}).Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}});
626
627 addRulesForGOpcs(OpcList: {G_UNMERGE_VALUES})
628 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, UnmergeToShiftTrunc}})
629 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
630 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
631
632 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
633 auto Pred =
634 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
635 return CmpInst::isSigned(predicate: Pred);
636 });
637
638 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
639 auto Pred =
640 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
641 return ICmpInst::isEquality(P: Pred);
642 });
643
644 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
645 // clang-format off
646 addRulesForGOpcs(OpcList: {G_ICMP})
647 .Any(Init: {.Predicate: {{UniS1, _, S16}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
648 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
649 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
650 .Any(Init: {.Predicate: {{DivS1, _, S16}}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
651 .Any(Init: {.Predicate: {{UniS1, _, S32}}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
652 .Any(Init: {.Predicate: {{DivS1, _, S32}}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
653 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, STPred: HasScalarCompareEq64)
654 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, STPred: !HasScalarCompareEq64)
655 .Any(Init: {.Predicate: {{UniS1, _, S64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
656 .Any(Init: {.Predicate: {{DivS1, _, S64}}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}})
657 .Any(Init: {.Predicate: {{UniS1, _, Ptr32}}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
658 .Any(Init: {.Predicate: {{DivS1, _, Ptr32}}, .OperandMapping: {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
659 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, STPred: HasScalarCompareEq64)
660 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, STPred: !HasScalarCompareEq64)
661 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
662 .Any(Init: {.Predicate: {{DivS1, _, Ptr64}}, .OperandMapping: {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
663 // clang-format on
664
665 addRulesForGOpcs(OpcList: {G_BRCOND})
666 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
667 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
668
669 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
670
671 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
672 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
673 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
674 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
675 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
676 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
677 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
678
679 addRulesForGOpcs(OpcList: {G_ANYEXT})
680 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
681 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
682 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
683 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
684 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
685 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
686 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
687 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
688 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
689 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
690
691 bool Has16bitCmp = ST->has16BitInsts();
692
693 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
694 // It is up to user to deal with truncated bits.
695 addRulesForGOpcs(OpcList: {G_TRUNC})
696 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
697 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
698 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
699 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
700 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
701 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
702 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
703 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
704 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
705 // This is non-trivial. VgprToVccCopy is done using compare instruction.
706 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}, STPred: Has16bitCmp)
707 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr32AExt}, VgprToVccCopy}},
708 STPred: !Has16bitCmp)
709 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
710 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
711
712 addRulesForGOpcs(OpcList: {G_ZEXT})
713 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
714 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
715 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
716 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
717 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
718 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
719 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
720 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
721 // not extending S16 to S32 is questionable.
722 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
723 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
724 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
725 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
726
727 addRulesForGOpcs(OpcList: {G_SEXT})
728 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
729 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
730 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
731 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
732 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
733 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
734 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
735 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
736 // not extending S16 to S32 is questionable.
737 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
738 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
739 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
740 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
741
742 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
743 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
744 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
745 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
746 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
747
748 addRulesForGOpcs(OpcList: {G_ASSERT_ZEXT, G_ASSERT_SEXT}, FastTypes: Standard)
749 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Imm}})
750 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Imm}})
751 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Imm}})
752 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Imm}});
753
754 // Atomic read-modify-write operations: result and value are always VGPR,
755 // pointer varies by address space.
756 addRulesForGOpcs(OpcList: {G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
757 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR})
758 .Any(Init: {.Predicate: {S32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
759 .Any(Init: {.Predicate: {S64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
760 .Any(Init: {.Predicate: {S32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
761 .Any(Init: {.Predicate: {S64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
762 .Any(Init: {.Predicate: {S32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
763 .Any(Init: {.Predicate: {S64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}});
764
765 addRulesForGOpcs(OpcList: {G_ATOMIC_CMPXCHG})
766 .Any(Init: {.Predicate: {DivS32, P2}, .OperandMapping: {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
767 .Any(Init: {.Predicate: {DivS64, P2}, .OperandMapping: {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
768 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
769 .Any(Init: {.Predicate: {DivS64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
770
771 addRulesForGOpcs(OpcList: {G_AMDGPU_ATOMIC_CMPXCHG})
772 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, VgprV2S32}}})
773 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, VgprV2S32}}})
774 .Any(Init: {.Predicate: {DivS64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, VgprV2S64}}})
775 .Any(Init: {.Predicate: {DivS64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, VgprV2S64}}});
776
777 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
778 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
779 bool usesTrue16 = ST->useRealTrue16Insts();
780
781 Predicate isAlign16([](const MachineInstr &MI) -> bool {
782 return (*MI.memoperands_begin())->getAlign() >= Align(16);
783 });
784
785 Predicate isAlign4([](const MachineInstr &MI) -> bool {
786 return (*MI.memoperands_begin())->getAlign() >= Align(4);
787 });
788
789 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
790 return (*MI.memoperands_begin())->isAtomic();
791 });
792
793 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
794 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
795 });
796
797 Predicate isConst([](const MachineInstr &MI) -> bool {
798 // Address space in MMO be different then address space on pointer.
799 const MachineMemOperand *MMO = *MI.memoperands_begin();
800 const unsigned AS = MMO->getAddrSpace();
801 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
802 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
803 });
804
805 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
806 return (*MI.memoperands_begin())->isVolatile();
807 });
808
809 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
810 return (*MI.memoperands_begin())->isInvariant();
811 });
812
813 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
814 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
815 });
816
817 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
818 const MachineMemOperand *MMO = *MI.memoperands_begin();
819 return MMO->getAlign() >= Align(MMO->getSize().getValue());
820 });
821
822 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
823 const MachineMemOperand *MMO = *MI.memoperands_begin();
824 const unsigned MemSize = 8 * MMO->getSize().getValue();
825 return MemSize == 16 || MemSize == 8;
826 });
827
828 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
829 const MachineMemOperand *MMO = *MI.memoperands_begin();
830 return 8 * MMO->getSize().getValue() == 32;
831 });
832
833 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
834 (isConst || isInvMMO || isNoClobberMMO);
835
836 // clang-format off
837 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
838 addRulesForGOpcs(OpcList: {G_LOAD})
839 // flat, addrspace(0), never uniform - flat_load
840 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
841 .Any(Init: {.Predicate: {DivB32, P0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
842 .Any(Init: {.Predicate: {DivB64, P0}, .OperandMapping: {{VgprB64}, {VgprP0}}})
843 .Any(Init: {.Predicate: {DivB96, P0}, .OperandMapping: {{VgprB96}, {VgprP0}}})
844 .Any(Init: {.Predicate: {DivB128, P0}, .OperandMapping: {{VgprB128}, {VgprP0}}})
845
846 // global, addrspace(1)
847 // divergent - global_load
848 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
849 .Any(Init: {.Predicate: {DivB32, P1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
850 .Any(Init: {.Predicate: {DivB64, P1}, .OperandMapping: {{VgprB64}, {VgprP1}}})
851 .Any(Init: {.Predicate: {DivB96, P1}, .OperandMapping: {{VgprB96}, {VgprP1}}})
852 .Any(Init: {.Predicate: {DivB128, P1}, .OperandMapping: {{VgprB128}, {VgprP1}}})
853 .Any(Init: {.Predicate: {DivB256, P1}, .OperandMapping: {{VgprB256}, {VgprP1}, SplitLoad}})
854 .Any(Init: {.Predicate: {DivB512, P1}, .OperandMapping: {{VgprB512}, {VgprP1}, SplitLoad}})
855
856 // uniform - s_load
857 .Any(Init: {.Predicate: {{UniS16, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
858 .Any(Init: {.Predicate: {{UniS16, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
859 .Any(Init: {.Predicate: {{UniB32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
860 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
861 .Any(Init: {.Predicate: {{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
862 .Any(Init: {.Predicate: {{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}) //32-bit load
863 .Any(Init: {.Predicate: {{UniB64, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP1}}})
864 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, WidenLoad}}, STPred: !hasSMRDx3)
865 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, SplitLoad}}, STPred: !hasSMRDx3)
866 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}}}, STPred: hasSMRDx3)
867 .Any(Init: {.Predicate: {{UniB128, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP1}}})
868 .Any(Init: {.Predicate: {{UniB256, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
869 .Any(Init: {.Predicate: {{UniB512, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
870
871 // Uniform via global or buffer load, for example volatile or non-aligned
872 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
873 // selected as global_load, use SgprP1 for pointer instead to match
874 // patterns without flat-for-global, default for GFX7 and older.
875 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
876 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
877 .Any(Init: {.Predicate: {{UniS16, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
878 .Any(Init: {.Predicate: {{UniS16, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
879 .Any(Init: {.Predicate: {{UniB32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
880 .Any(Init: {.Predicate: {{UniB32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
881 .Any(Init: {.Predicate: {{UniB64, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP1}}})
882 .Any(Init: {.Predicate: {{UniB96, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP1}}})
883 .Any(Init: {.Predicate: {{UniB128, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP1}}})
884 .Any(Init: {.Predicate: {{UniB256, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP1}, SplitLoad}})
885 .Any(Init: {.Predicate: {{UniB512, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP1}, SplitLoad}})
886
887 // local, addrspace(3) - ds_load
888 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
889 .Any(Init: {.Predicate: {DivB32, P3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
890 .Any(Init: {.Predicate: {DivB64, P3}, .OperandMapping: {{VgprB64}, {VgprP3}}})
891 .Any(Init: {.Predicate: {DivB96, P3}, .OperandMapping: {{VgprB96}, {VgprP3}}})
892 .Any(Init: {.Predicate: {DivB128, P3}, .OperandMapping: {{VgprB128}, {VgprP3}}})
893
894 .Any(Init: {.Predicate: {UniS16, P3}, .OperandMapping: {{UniInVgprS16}, {SgprP3}}}, STPred: usesTrue16) // 16-bit load
895 .Any(Init: {.Predicate: {UniB32, P3}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
896 .Any(Init: {.Predicate: {UniB64, P3}, .OperandMapping: {{UniInVgprB64}, {VgprP3}}})
897 .Any(Init: {.Predicate: {UniB96, P3}, .OperandMapping: {{UniInVgprB96}, {VgprP3}}})
898 .Any(Init: {.Predicate: {UniB128, P3}, .OperandMapping: {{UniInVgprB128}, {VgprP3}}})
899
900 // constant, addrspace(4)
901 // divergent - global_load
902 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
903 .Any(Init: {.Predicate: {DivB32, P4}, .OperandMapping: {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
904 .Any(Init: {.Predicate: {DivB64, P4}, .OperandMapping: {{VgprB64}, {VgprP4}}})
905 .Any(Init: {.Predicate: {DivB96, P4}, .OperandMapping: {{VgprB96}, {VgprP4}}})
906 .Any(Init: {.Predicate: {DivB128, P4}, .OperandMapping: {{VgprB128}, {VgprP4}}})
907 .Any(Init: {.Predicate: {DivB256, P4}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
908 .Any(Init: {.Predicate: {DivB512, P4}, .OperandMapping: {{VgprB512}, {VgprP4}, SplitLoad}})
909
910 // uniform - s_load
911 .Any(Init: {.Predicate: {{UniS16, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
912 .Any(Init: {.Predicate: {{UniS16, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
913 .Any(Init: {.Predicate: {{UniB32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
914 .Any(Init: {.Predicate: {{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
915 .Any(Init: {.Predicate: {{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) //32-bit load
916 .Any(Init: {.Predicate: {{UniB64, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP4}}})
917 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasSMRDx3)
918 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasSMRDx3)
919 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasSMRDx3)
920 .Any(Init: {.Predicate: {{UniB128, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP4}}})
921 .Any(Init: {.Predicate: {{UniB256, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
922 .Any(Init: {.Predicate: {{UniB512, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
923
924 // uniform in vgpr - global_load or buffer_load
925 .Any(Init: {.Predicate: {{UniS16, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
926 .Any(Init: {.Predicate: {{UniS16, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
927 .Any(Init: {.Predicate: {{UniB32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
928 .Any(Init: {.Predicate: {{UniB32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
929 .Any(Init: {.Predicate: {{UniB64, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP4}}})
930 .Any(Init: {.Predicate: {{UniB96, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP4}}})
931 .Any(Init: {.Predicate: {{UniB128, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP4}}})
932 .Any(Init: {.Predicate: {{UniB256, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP4}, SplitLoad}})
933 .Any(Init: {.Predicate: {{UniB512, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP4}, SplitLoad}})
934
935 // private, addrspace(5), never uniform - scratch_load
936 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16)
937 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
938 .Any(Init: {.Predicate: {DivB64, P5}, .OperandMapping: {{VgprB64}, {VgprP5}}})
939 .Any(Init: {.Predicate: {DivB96, P5}, .OperandMapping: {{VgprB96}, {VgprP5}}})
940 .Any(Init: {.Predicate: {DivB128, P5}, .OperandMapping: {{VgprB128}, {VgprP5}}})
941
942 .Any(Init: {.Predicate: {DivS32, Ptr128}, .OperandMapping: {{Vgpr32}, {VgprPtr128}}});
943
944
945 addRulesForGOpcs(OpcList: {G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
946 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0}}})
947
948 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1}}})
949 .Any(Init: {.Predicate: {{UniS32, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
950 .Any(Init: {.Predicate: {{UniS32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}}}, STPred: hasSMRDSmall)
951 .Any(Init: {.Predicate: {{UniS32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: !hasSMRDSmall)
952 .Any(Init: {.Predicate: {{UniS32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: hasSMRDSmall)
953
954 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3}}})
955 .Any(Init: {.Predicate: {UniS32, P3}, .OperandMapping: {{UniInVgprS32}, {VgprP3}}})
956
957 .Any(Init: {.Predicate: {DivS32, P4}, .OperandMapping: {{Vgpr32}, {VgprP4}}})
958 .Any(Init: {.Predicate: {{UniS32, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
959 .Any(Init: {.Predicate: {{UniS32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}}}, STPred: hasSMRDSmall)
960 .Any(Init: {.Predicate: {{UniS32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: !hasSMRDSmall)
961 .Any(Init: {.Predicate: {{UniS32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: hasSMRDSmall)
962
963 .Any(Init: {.Predicate: {DivS32, P5}, .OperandMapping: {{Vgpr32}, {VgprP5}}});
964
965 addRulesForGOpcs(OpcList: {G_STORE})
966 // addrspace(0)
967 .Any(Init: {.Predicate: {S16, P0}, .OperandMapping: {{}, {Vgpr16, VgprP0}}}, STPred: usesTrue16) // 16-bit store
968 .Any(Init: {.Predicate: {B32, P0}, .OperandMapping: {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
969 .Any(Init: {.Predicate: {B64, P0}, .OperandMapping: {{}, {VgprB64, VgprP0}}})
970 .Any(Init: {.Predicate: {B96, P0}, .OperandMapping: {{}, {VgprB96, VgprP0}}})
971 .Any(Init: {.Predicate: {B128, P0}, .OperandMapping: {{}, {VgprB128, VgprP0}}})
972
973 // addrspace(1), there are no stores to addrspace(4)
974 // For targets:
975 // - with "+flat-for-global" - global_store
976 // - without(-flat-for-global) - buffer_store addr64
977 .Any(Init: {.Predicate: {S16, DivP1}, .OperandMapping: {{}, {Vgpr16, VgprP1}}}, STPred: usesTrue16) // 16-bit store
978 .Any(Init: {.Predicate: {B32, DivP1}, .OperandMapping: {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
979 .Any(Init: {.Predicate: {B64, DivP1}, .OperandMapping: {{}, {VgprB64, VgprP1}}})
980 .Any(Init: {.Predicate: {B96, DivP1}, .OperandMapping: {{}, {VgprB96, VgprP1}}})
981 .Any(Init: {.Predicate: {B128, DivP1}, .OperandMapping: {{}, {VgprB128, VgprP1}}})
982
983 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
984 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
985 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
986 .Any(Init: {.Predicate: {S16, UniP1}, .OperandMapping: {{}, {Vgpr16, SgprP1}}}, STPred: usesTrue16) // 16-bit store
987 .Any(Init: {.Predicate: {B32, UniP1}, .OperandMapping: {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
988 .Any(Init: {.Predicate: {B64, UniP1}, .OperandMapping: {{}, {VgprB64, SgprP1}}})
989 .Any(Init: {.Predicate: {B96, UniP1}, .OperandMapping: {{}, {VgprB96, SgprP1}}})
990 .Any(Init: {.Predicate: {B128, UniP1}, .OperandMapping: {{}, {VgprB128, SgprP1}}})
991
992 // addrspace(3) and addrspace(5)
993 .Any(Init: {.Predicate: {S16, Ptr32}, .OperandMapping: {{}, {Vgpr16, VgprPtr32}}}, STPred: usesTrue16) // 16-bit store
994 .Any(Init: {.Predicate: {B32, Ptr32}, .OperandMapping: {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
995 .Any(Init: {.Predicate: {B64, Ptr32}, .OperandMapping: {{}, {VgprB64, VgprPtr32}}})
996 .Any(Init: {.Predicate: {B96, Ptr32}, .OperandMapping: {{}, {VgprB96, VgprPtr32}}})
997 .Any(Init: {.Predicate: {B128, Ptr32}, .OperandMapping: {{}, {VgprB128, VgprPtr32}}});
998 // clang-format on
999
1000 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1001 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1002 FastTypes: StandardB)
1003 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1004 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1005 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1006 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1007 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1008 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1009 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1010 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1011
1012 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1013 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1014 FastTypes: StandardB)
1015 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1016 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1017
1018 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_FORMAT,
1019 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1020 G_AMDGPU_TBUFFER_STORE_FORMAT,
1021 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1022 .Any(Init: {.Predicate: {B32}, .OperandMapping: {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1023 .Any(Init: {.Predicate: {B64}, .OperandMapping: {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1024 .Any(Init: {.Predicate: {B128}, .OperandMapping: {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1025 .Any(
1026 Init: {.Predicate: {V2S32}, .OperandMapping: {{}, {VgprV2S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1027 .Any(
1028 Init: {.Predicate: {V3S32}, .OperandMapping: {{}, {VgprV3S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1029 .Any(Init: {.Predicate: {V4S32},
1030 .OperandMapping: {{}, {VgprV4S32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1031
1032 addRulesForGOpcs(OpcList: {G_PTR_ADD})
1033 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1034 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1035 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1036 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1037
1038 addRulesForGOpcs(OpcList: {G_INTTOPTR})
1039 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
1040 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
1041 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
1042 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
1043 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
1044 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
1045
1046 addRulesForGOpcs(OpcList: {G_PTRTOINT})
1047 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
1048 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
1049 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
1050 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
1051 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
1052 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
1053
1054 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1055 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1056 addRulesForGOpcs(OpcList: {G_PTRMASK})
1057 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
1058 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
1059 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {SgprP3, Sgpr32}}})
1060 .Any(Init: {.Predicate: {DivP3}, .OperandMapping: {{VgprP3}, {VgprP3, Vgpr32}}});
1061
1062 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}});
1063
1064 addRulesForGOpcs(OpcList: {G_BITREVERSE}, FastTypes: Standard)
1065 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1066 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1067 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
1068 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1069
1070 addRulesForGOpcs(OpcList: {G_FENCE}).Any(Init: {.Predicate: {{}}, .OperandMapping: {{}, {}}});
1071
1072 addRulesForGOpcs(OpcList: {G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, FastTypes: Standard)
1073 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
1074
1075 addRulesForGOpcs(OpcList: {G_BLOCK_ADDR}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1076
1077 addRulesForGOpcs(OpcList: {G_GLOBAL_VALUE})
1078 .Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}})
1079 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {}}})
1080 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {}}})
1081 .Any(Init: {.Predicate: {UniP4}, .OperandMapping: {{SgprP4}, {}}})
1082 .Any(Init: {.Predicate: {UniP8}, .OperandMapping: {{SgprP8}, {}}});
1083
1084 addRulesForGOpcs(OpcList: {G_AMDGPU_WAVE_ADDRESS}).Any(Init: {.Predicate: {UniP5}, .OperandMapping: {{SgprP5}, {}}});
1085
1086 bool hasSALUFloat = ST->hasSALUFloatInsts();
1087
1088 addRulesForGOpcs(OpcList: {G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, FastTypes: Standard)
1089 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1090 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1091 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1092 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1093 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1094 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1095 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1096 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1097 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, STPred: !hasSALUFloat)
1098 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
1099 STPred: hasSALUFloat)
1100 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1101
1102 addRulesForGOpcs(OpcList: {G_FSUB, G_STRICT_FSUB}, FastTypes: Standard)
1103 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1104 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1105 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1106 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1107 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1108 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1109
1110 addRulesForGOpcs(OpcList: {G_FMAD}, FastTypes: Standard)
1111 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1112 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1113 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1114 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1115
1116 addRulesForGOpcs(OpcList: {G_FLDEXP, G_STRICT_FLDEXP}, FastTypes: Standard)
1117 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1118 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1119 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1120 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1121 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1122 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
1123
1124 addRulesForGOpcs(OpcList: {G_FMA}, FastTypes: Standard)
1125 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1126 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1127 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1128 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1129 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
1130 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1131 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1132 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1133 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1134 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1135 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1136 .Uni(Ty: V2S16,
1137 RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
1138 STPred: hasSALUFloat)
1139 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
1140 STPred: !hasSALUFloat);
1141
1142 addRulesForGOpcs(OpcList: {G_AMDGPU_FMED3}, FastTypes: Standard)
1143 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1144 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1145 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1146 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1147
1148 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1149 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1150 // instructions on SALU.
1151 addRulesForGOpcs(OpcList: {G_AMDGPU_SMED3}, FastTypes: Standard)
1152 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1153 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1154
1155 // FNEG and FABS are either folded as source modifiers or can be selected as
1156 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1157 // targets without SALU float we still select them as VGPR since there would
1158 // be no real sgpr use.
1159 addRulesForGOpcs(OpcList: {G_FNEG, G_FABS}, FastTypes: Standard)
1160 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasSALUFloat)
1161 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1162 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1163 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}}, STPred: !hasSALUFloat)
1164 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1165 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1166 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1167 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1168 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}}, STPred: !hasSALUFloat)
1169 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, STPred: hasSALUFloat)
1170 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1171 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1172 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1173
1174 addRulesForGOpcs(OpcList: {G_FCANONICALIZE}, FastTypes: Standard)
1175 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1176 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1177 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1178 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1179 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1180 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1181 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
1182 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1183 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1184 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1185
1186 addRulesForGOpcs(OpcList: {G_FPTOUI, G_FPTOSI})
1187 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1188 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1189 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1190 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat)
1191 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1192 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1193 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1194 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1195 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1196 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}});
1197
1198 addRulesForGOpcs(OpcList: {G_UITOFP, G_SITOFP})
1199 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1200 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1201 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1202 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1203 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1204 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1205 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1206 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1207 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1208 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}});
1209
1210 addRulesForGOpcs(OpcList: {G_FPEXT})
1211 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1212 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1213 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}})
1214 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1215 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat);
1216
1217 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_PK_I16_I32}, FastTypes: Standard)
1218 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1219 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {Vgpr32, Vgpr32}});
1220
1221 addRulesForGOpcs(OpcList: {G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, FastTypes: Standard)
1222 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1223 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
1224
1225 addRulesForGOpcs(OpcList: {G_FPTRUNC})
1226 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1227 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1228 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1229 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {VgprV2S32}}})
1230 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
1231 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1232 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat);
1233
1234 addRulesForGOpcs(OpcList: {G_IS_FPCLASS})
1235 .Any(Init: {.Predicate: {DivS1, S16}, .OperandMapping: {{Vcc}, {Vgpr16}}})
1236 .Any(Init: {.Predicate: {UniS1, S16}, .OperandMapping: {{UniInVcc}, {Vgpr16}}})
1237 .Any(Init: {.Predicate: {DivS1, S32}, .OperandMapping: {{Vcc}, {Vgpr32}}})
1238 .Any(Init: {.Predicate: {UniS1, S32}, .OperandMapping: {{UniInVcc}, {Vgpr32}}})
1239 .Any(Init: {.Predicate: {DivS1, S64}, .OperandMapping: {{Vcc}, {Vgpr64}}})
1240 .Any(Init: {.Predicate: {UniS1, S64}, .OperandMapping: {{UniInVcc}, {Vgpr64}}});
1241
1242 addRulesForGOpcs(OpcList: {G_FCMP}, FastTypes: Standard)
1243 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1244 STPred: hasSALUFloat)
1245 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1246 STPred: !hasSALUFloat)
1247 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
1248 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1249 STPred: hasSALUFloat)
1250 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1251 STPred: !hasSALUFloat)
1252 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
1253 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1254 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}});
1255
1256 using namespace Intrinsic;
1257
1258 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}});
1259
1260 addRulesForIOpcs(OpcList: {amdgcn_groupstaticsize}).Any(Init: {.Predicate: {S32}, .OperandMapping: {{Sgpr32}, {IntrId}}});
1261
1262 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1263 addRulesForIOpcs(OpcList: {amdgcn_end_cf})
1264 .Any(Init: {.Predicate: {_, UniS32}, .OperandMapping: {{}, {IntrId, Sgpr32}}})
1265 .Any(Init: {.Predicate: {_, UniS64}, .OperandMapping: {{}, {IntrId, Sgpr64}}});
1266
1267 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
1268 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1269 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1270
1271 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
1272 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1273
1274 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
1275 .Any(Init: {.Predicate: {UniS32, _, DivS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}})
1276 // this should not exist in the first place, it is from call lowering
1277 // readfirstlaning just in case register is not in sgpr.
1278 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
1279
1280 addRulesForIOpcs(OpcList: {amdgcn_s_sleep}).Any(Init: {.Predicate: {_, _}, .OperandMapping: {{}, {IntrId, Imm}}});
1281
1282 addRulesForIOpcs(OpcList: {amdgcn_mul_u24, amdgcn_mul_i24}, FastTypes: Standard)
1283 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1284 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1285 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1286 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1287
1288 addRulesForIOpcs(OpcList: {amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1289 FastTypes: Standard)
1290 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1291 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1292
1293 addRulesForIOpcs(OpcList: {amdgcn_fma_legacy}, FastTypes: Standard)
1294 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1295 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1296
1297 addRulesForIOpcs(OpcList: {amdgcn_frexp_mant, amdgcn_fract}, FastTypes: Standard)
1298 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1299 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1300 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1301 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1302 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
1303 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
1304
1305 addRulesForIOpcs(OpcList: {amdgcn_prng_b32})
1306 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
1307 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}});
1308
1309 addRulesForIOpcs(OpcList: {amdgcn_ubfe, amdgcn_sbfe}, FastTypes: Standard)
1310 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1311 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1312 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1313 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1314
1315 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b64})
1316 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1317 .Any(Init: {.Predicate: {DivB32}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1}}});
1318
1319 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b128})
1320 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1321 .Any(Init: {.Predicate: {DivB128}, .OperandMapping: {{VgprB128}, {IntrId, SgprP1}}});
1322
1323 addRulesForIOpcs(OpcList: {amdgcn_wwm, amdgcn_strict_wwm}, FastTypes: StandardB)
1324 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
1325 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32}})
1326 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}})
1327 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64}})
1328 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, VgprB96}})
1329 .Uni(Ty: B96, RuleApplyIDs: {{SgprB96}, {IntrId, SgprB96}})
1330 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB128}})
1331 .Uni(Ty: B128, RuleApplyIDs: {{SgprB128}, {IntrId, SgprB128}})
1332 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {IntrId, SgprB256}}})
1333 .Any(Init: {.Predicate: {DivB256}, .OperandMapping: {{VgprB256}, {IntrId, VgprB256}}})
1334 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {IntrId, SgprB512}}})
1335 .Any(Init: {.Predicate: {DivB512}, .OperandMapping: {{VgprB512}, {IntrId, VgprB512}}});
1336
1337} // end initialize rules
1338