1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128);
73 case Ptr32:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
75 case Ptr64:
76 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
77 case Ptr128:
78 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isUniform(V: Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128) && MUI.isUniform(V: Reg);
125 case UniPtr32:
126 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg);
127 case UniPtr64:
128 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg);
129 case UniPtr128:
130 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isUniform(V: Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(V: Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(V: Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isDivergent(V: Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg);
182 case DivPtr32:
183 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg);
184 case DivPtr64:
185 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg);
186 case DivPtr128:
187 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
192 case DivV3S32:
193 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
194 case DivV4S16:
195 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
196 case DivB32:
197 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg);
198 case DivB64:
199 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg);
200 case DivB96:
201 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg);
202 case DivB128:
203 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg);
204 case DivB160:
205 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(V: Reg);
206 case DivB256:
207 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg);
208 case DivB512:
209 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg);
210 case DivBRC: {
211 if (!MUI.isDivergent(V: Reg))
212 return false;
213 // Check if there is VGPR register class of same size as the LLT.
214 const SIRegisterInfo *TRI =
215 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
216 return TRI->getSGPRClassForBitWidth(BitWidth: MRI.getType(Reg).getSizeInBits());
217 }
218 case _:
219 return true;
220 default:
221 llvm_unreachable("missing matchUniformityAndLLT");
222 }
223}
224
225bool PredicateMapping::match(const MachineInstr &MI,
226 const MachineUniformityInfo &MUI,
227 const MachineRegisterInfo &MRI) const {
228 // Check LLT signature.
229 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
230 const MachineOperand &MO = MI.getOperand(i);
231 if (OpUniformityAndTypes[i] == _) {
232 assert((!MI.getOperand(i).isReg() ||
233 !MI.getOperand(i).getReg().isVirtual()) &&
234 "_ is for non-register and physical register operands only");
235 continue;
236 }
237
238 // Remaining IDs check registers.
239 if (!MO.isReg())
240 return false;
241
242 if (!matchUniformityAndLLT(Reg: MO.getReg(), UniID: OpUniformityAndTypes[i], MUI, MRI))
243 return false;
244 }
245
246 // More complex check.
247 if (TestFunc)
248 return TestFunc(MI);
249
250 return true;
251}
252
253SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
254
255SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
256 : FastTypes(FastTypes) {}
257
258UniformityLLTOpPredicateID LLTToId(LLT Ty) {
259 if (Ty == LLT::scalar(SizeInBits: 16))
260 return S16;
261 if (Ty == LLT::scalar(SizeInBits: 32))
262 return S32;
263 if (Ty == LLT::scalar(SizeInBits: 64))
264 return S64;
265 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
266 return V2S16;
267 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
268 return V2S32;
269 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
270 return V3S32;
271 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
272 return V4S32;
273 return _;
274}
275
276UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
277 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
278 isAnyPtr(Ty, Width: 32))
279 return B32;
280 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
281 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
282 return B64;
283 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
284 return B96;
285 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ||
286 Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 128))
287 return B128;
288 return _;
289}
290
291const RegBankLLTMapping *
292SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
293 const MachineRegisterInfo &MRI,
294 const MachineUniformityInfo &MUI) const {
295 // Search in "Fast Rules".
296 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
297 // slot that could "match fast Predicate". If not, InvalidMapping is
298 // returned which results in failure, does not search "Slow Rules".
299 if (FastTypes != NoFastRules) {
300 Register Reg = MI.getOperand(i: 0).getReg();
301 int Slot;
302 if (FastTypes == StandardB)
303 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
304 else
305 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
306
307 if (Slot != -1)
308 return MUI.isUniform(V: Reg) ? &Uni[Slot] : &Div[Slot];
309 }
310
311 // Slow search for more complex rules.
312 for (const RegBankLegalizeRule &Rule : Rules) {
313 if (Rule.Predicate.match(MI, MUI, MRI))
314 return &Rule.OperandMapping;
315 }
316
317 return nullptr;
318}
319
320void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
321 Rules.push_back(Elt: Rule);
322}
323
324void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
325 RegBankLLTMapping RuleApplyIDs) {
326 int Slot = getFastPredicateSlot(Ty);
327 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
328 Div[Slot] = std::move(RuleApplyIDs);
329}
330
331void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
332 RegBankLLTMapping RuleApplyIDs) {
333 int Slot = getFastPredicateSlot(Ty);
334 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
335 Uni[Slot] = std::move(RuleApplyIDs);
336}
337
338int SetOfRulesForOpcode::getFastPredicateSlot(
339 UniformityLLTOpPredicateID Ty) const {
340 switch (FastTypes) {
341 case Standard: {
342 switch (Ty) {
343 case S32:
344 return 0;
345 case S16:
346 return 1;
347 case S64:
348 return 2;
349 case V2S16:
350 return 3;
351 default:
352 return -1;
353 }
354 }
355 case StandardB: {
356 switch (Ty) {
357 case B32:
358 return 0;
359 case B64:
360 return 1;
361 case B96:
362 return 2;
363 case B128:
364 return 3;
365 default:
366 return -1;
367 }
368 }
369 case Vector: {
370 switch (Ty) {
371 case S32:
372 return 0;
373 case V2S32:
374 return 1;
375 case V3S32:
376 return 2;
377 case V4S32:
378 return 3;
379 default:
380 return -1;
381 }
382 }
383 default:
384 return -1;
385 }
386}
387
388RegBankLegalizeRules::RuleSetInitializer
389RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
390 FastRulesTypes FastTypes) {
391 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
392}
393
394RegBankLegalizeRules::RuleSetInitializer
395RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
396 FastRulesTypes FastTypes) {
397 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
398}
399
400const SetOfRulesForOpcode *
401RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
402 unsigned Opc = MI.getOpcode();
403 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
404 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
405 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
406 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
407 auto IRAIt = IRulesAlias.find(Val: IntrID);
408 if (IRAIt == IRulesAlias.end())
409 return nullptr;
410 return &IRules.at(Val: IRAIt->second);
411 }
412
413 auto GRAIt = GRulesAlias.find(Val: Opc);
414 if (GRAIt == GRulesAlias.end())
415 return nullptr;
416 return &GRules.at(Val: GRAIt->second);
417}
418
419// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
420class Predicate {
421private:
422 struct Elt {
423 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
424 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
425 // Sequences of && and || will be represented by jumps, for example:
426 // (A && B && ... X) or (A && B && ... X) || Y
427 // A == true jump to B
428 // A == false jump to end or Y, result is A(false) or Y
429 // (A || B || ... X) or (A || B || ... X) && Y
430 // A == true jump to end or Y, result is A(true) or Y
431 // A == false jump to B
432 // Notice that when negating expression, we simply flip Neg on each Pred
433 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
434 std::function<bool(const MachineInstr &)> Pred;
435 bool Neg; // Neg of Pred is calculated before jump
436 unsigned TJumpOffset;
437 unsigned FJumpOffset;
438 };
439
440 SmallVector<Elt, 8> Expression;
441
442 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
443
444public:
445 Predicate(std::function<bool(const MachineInstr &)> Pred) {
446 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
447 };
448
449 bool operator()(const MachineInstr &MI) const {
450 unsigned Idx = 0;
451 unsigned ResultIdx = Expression.size();
452 bool Result;
453 do {
454 Result = Expression[Idx].Pred(MI);
455 Result = Expression[Idx].Neg ? !Result : Result;
456 if (Result) {
457 Idx += Expression[Idx].TJumpOffset;
458 } else {
459 Idx += Expression[Idx].FJumpOffset;
460 }
461 } while ((Idx != ResultIdx));
462
463 return Result;
464 };
465
466 Predicate operator!() const {
467 SmallVector<Elt, 8> NegExpression;
468 for (const Elt &ExprElt : Expression) {
469 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
470 .FJumpOffset: ExprElt.TJumpOffset});
471 }
472 return Predicate(std::move(NegExpression));
473 };
474
475 Predicate operator&&(const Predicate &RHS) const {
476 SmallVector<Elt, 8> AndExpression = Expression;
477
478 unsigned RHSSize = RHS.Expression.size();
479 unsigned ResultIdx = Expression.size();
480 for (unsigned i = 0; i < ResultIdx; ++i) {
481 // LHS results in false, whole expression results in false.
482 if (i + AndExpression[i].FJumpOffset == ResultIdx)
483 AndExpression[i].FJumpOffset += RHSSize;
484 }
485
486 AndExpression.append(RHS: RHS.Expression);
487
488 return Predicate(std::move(AndExpression));
489 }
490
491 Predicate operator||(const Predicate &RHS) const {
492 SmallVector<Elt, 8> OrExpression = Expression;
493
494 unsigned RHSSize = RHS.Expression.size();
495 unsigned ResultIdx = Expression.size();
496 for (unsigned i = 0; i < ResultIdx; ++i) {
497 // LHS results in true, whole expression results in true.
498 if (i + OrExpression[i].TJumpOffset == ResultIdx)
499 OrExpression[i].TJumpOffset += RHSSize;
500 }
501
502 OrExpression.append(RHS: RHS.Expression);
503
504 return Predicate(std::move(OrExpression));
505 }
506};
507
508// Initialize rules
509RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
510 MachineRegisterInfo &_MRI)
511 : ST(&_ST), MRI(&_MRI) {
512
513 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
514 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
515 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
516 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
517 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
518 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
519 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
520 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}})
521 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
522
523 addRulesForGOpcs(OpcList: {G_UADDO, G_USUBO}, FastTypes: Standard)
524 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
525 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
526
527 addRulesForGOpcs(OpcList: {G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, FastTypes: Standard)
528 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
529 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
530
531 addRulesForGOpcs(OpcList: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, FastTypes: Standard)
532 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
533 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
534 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
535 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
536 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
537 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
538
539 bool HasVecMulU64 = ST->hasVectorMulU64();
540 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard)
541 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
542 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
543 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
544 .Uni(Ty: S64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
545 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
546 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
547 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
548 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}}, STPred: HasVecMulU64)
549 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, STPred: !HasVecMulU64);
550
551 bool hasMulHi = ST->hasScalarMulHiInsts();
552 addRulesForGOpcs(OpcList: {G_UMULH, G_SMULH}, FastTypes: Standard)
553 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
554 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasMulHi)
555 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasMulHi);
556
557 addRulesForGOpcs(OpcList: {G_AMDGPU_MAD_U64_U32}, FastTypes: Standard)
558 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
559 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
560
561 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
562 addRulesForGOpcs(OpcList: {G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, FastTypes: Standard)
563 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, STPred: HasScalarSMulU64)
564 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
565
566 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
567 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
568 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
569 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
570 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
571 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
572 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
573 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
574 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
575
576 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
577 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
578 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
579 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
580 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
581 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
582 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
583 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
584 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
585
586 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
587 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
588 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
589 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
590 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
591 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
592 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
593 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
594 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
595
596 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
597 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
598 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
599 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
600 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
601 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
602 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
603 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
604 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
605
606 addRulesForGOpcs(OpcList: {G_FSHR}, FastTypes: Standard)
607 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
608 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
609
610 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
611
612 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
613 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
614 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
615 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
616 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
617
618 addRulesForGOpcs(OpcList: {G_SMIN, G_SMAX}, FastTypes: Standard)
619 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
620 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
621 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
622 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
623 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
624 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
625
626 addRulesForGOpcs(OpcList: {G_UMIN, G_UMAX}, FastTypes: Standard)
627 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
628 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
629 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
630 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
631 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
632 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
633
634 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
635 // here, rest is trivially regbankselected earlier
636 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
637 addRulesForGOpcs(OpcList: {G_CONSTANT})
638 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}});
639
640 addRulesForGOpcs(OpcList: {G_FREEZE})
641 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt}}})
642 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}})
643 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16}}})
644 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
645 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}});
646
647 addRulesForGOpcs(OpcList: {G_UNMERGE_VALUES})
648 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, UnmergeToShiftTrunc}})
649 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
650 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
651
652 addRulesForGOpcs(OpcList: {G_PHI})
653 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {}, AextToS32InIncomingBlockGPHI}})
654 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
655 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
656 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, VerifyAllSgprOrVgprGPHI}});
657
658 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
659 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
660 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
661 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
662 addRulesForGOpcs(OpcList: {G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
663 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
664 G_AMDGPU_INTRIN_IMAGE_STORE,
665 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
666 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyINTRIN_IMAGE}});
667
668 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
669 auto Pred =
670 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
671 return CmpInst::isSigned(Pred);
672 });
673
674 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
675 auto Pred =
676 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
677 return ICmpInst::isEquality(P: Pred);
678 });
679
680 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
681 // clang-format off
682 addRulesForGOpcs(OpcList: {G_ICMP})
683 .Any(Init: {.Predicate: {{UniS1, _, S16}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
684 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
685 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
686 .Any(Init: {.Predicate: {{DivS1, _, S16}}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
687 .Any(Init: {.Predicate: {{UniS1, _, S32}}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
688 .Any(Init: {.Predicate: {{DivS1, _, S32}}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
689 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, STPred: HasScalarCompareEq64)
690 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, STPred: !HasScalarCompareEq64)
691 .Any(Init: {.Predicate: {{UniS1, _, S64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
692 .Any(Init: {.Predicate: {{DivS1, _, S64}}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}})
693 .Any(Init: {.Predicate: {{UniS1, _, Ptr32}}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
694 .Any(Init: {.Predicate: {{DivS1, _, Ptr32}}, .OperandMapping: {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
695 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, STPred: HasScalarCompareEq64)
696 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, STPred: !HasScalarCompareEq64)
697 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
698 .Any(Init: {.Predicate: {{DivS1, _, Ptr64}}, .OperandMapping: {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
699 // clang-format on
700
701 addRulesForGOpcs(OpcList: {G_BRCOND})
702 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
703 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
704
705 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
706
707 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
708 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
709 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
710 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
711 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
712 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
713 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
714
715 addRulesForGOpcs(OpcList: {G_ANYEXT})
716 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
717 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
718 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
719 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
720 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
721 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
722 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
723 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
724 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
725 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
726
727 bool Has16bitCmp = ST->has16BitInsts();
728
729 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
730 // It is up to user to deal with truncated bits.
731 addRulesForGOpcs(OpcList: {G_TRUNC})
732 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
733 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
734 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
735 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
736 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
737 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
738 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
739 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
740 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
741 // This is non-trivial. VgprToVccCopy is done using compare instruction.
742 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}, STPred: Has16bitCmp)
743 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr32AExt}, VgprToVccCopy}},
744 STPred: !Has16bitCmp)
745 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
746 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
747
748 addRulesForGOpcs(OpcList: {G_ZEXT})
749 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
750 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
751 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
752 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
753 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
754 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
755 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
756 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
757 // not extending S16 to S32 is questionable.
758 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
759 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
760 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
761 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
762
763 addRulesForGOpcs(OpcList: {G_SEXT})
764 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
765 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
766 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
767 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
768 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
769 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
770 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
771 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
772 // not extending S16 to S32 is questionable.
773 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
774 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
775 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
776 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
777
778 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
779 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
780 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
781 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
782 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
783
784 addRulesForGOpcs(OpcList: {G_ASSERT_ZEXT, G_ASSERT_SEXT}, FastTypes: Standard)
785 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Imm}})
786 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Imm}})
787 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Imm}})
788 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Imm}});
789
790 addRulesForGOpcs(OpcList: {G_ASSERT_ALIGN}, FastTypes: Standard)
791 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
792 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
793 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
794 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
795 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32}}})
796 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32}}})
797 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64}}})
798 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64}}});
799
800 // Atomic read-modify-write operations: result and value are always VGPR,
801 // pointer varies by address space.
802 addRulesForGOpcs(OpcList: {G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
803 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
804 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
805 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
806 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
807 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
808 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
809 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
810 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
811 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
812 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}});
813
814 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
815 bool HasAtomicBufferGlobalPkAddF16Insts =
816 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
817 ST->hasAtomicBufferGlobalPkAddF16Insts();
818 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
819 addRulesForGOpcs(OpcList: {G_ATOMICRMW_FADD})
820 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
821 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
822 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
823 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
824 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
825 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}})
826 .Any(Init: {.Predicate: {DivV2S16, P0, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP0, VgprV2S16}}},
827 STPred: HasAtomicFlatPkAdd16Insts)
828 .Any(Init: {.Predicate: {DivV2S16, P1, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP1, VgprV2S16}}},
829 STPred: HasAtomicBufferGlobalPkAddF16Insts)
830 .Any(Init: {.Predicate: {DivV2S16, P3, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP3, VgprV2S16}}},
831 STPred: HasAtomicDsPkAdd16Insts);
832
833 addRulesForGOpcs(OpcList: {G_ATOMIC_CMPXCHG})
834 .Any(Init: {.Predicate: {DivS32, P2}, .OperandMapping: {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
835 .Any(Init: {.Predicate: {DivS64, P2}, .OperandMapping: {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
836 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
837 .Any(Init: {.Predicate: {DivS64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
838
839 addRulesForGOpcs(OpcList: {G_AMDGPU_ATOMIC_CMPXCHG})
840 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, VgprV2S32}}})
841 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, VgprV2S32}}})
842 .Any(Init: {.Predicate: {DivS64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, VgprV2S64}}})
843 .Any(Init: {.Predicate: {DivS64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, VgprV2S64}}});
844
845 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, FastTypes: Standard)
846 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
847 {Vgpr32, Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
848 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64},
849 {Vgpr64, Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
850
851 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
852 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_SMAX,
853 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_FMAX,
854 G_AMDGPU_BUFFER_ATOMIC_FMIN},
855 FastTypes: Standard)
856 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
857 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
858
859 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
860 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
861 bool usesTrue16 = ST->useRealTrue16Insts();
862
863 Predicate isAlign16([](const MachineInstr &MI) -> bool {
864 return (*MI.memoperands_begin())->getAlign() >= Align(16);
865 });
866
867 Predicate isAlign4([](const MachineInstr &MI) -> bool {
868 return (*MI.memoperands_begin())->getAlign() >= Align(4);
869 });
870
871 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
872 return (*MI.memoperands_begin())->isAtomic();
873 });
874
875 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
876 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
877 });
878
879 Predicate isConst([](const MachineInstr &MI) -> bool {
880 // Address space in MMO be different then address space on pointer.
881 const MachineMemOperand *MMO = *MI.memoperands_begin();
882 const unsigned AS = MMO->getAddrSpace();
883 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
884 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
885 });
886
887 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
888 return (*MI.memoperands_begin())->isVolatile();
889 });
890
891 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
892 return (*MI.memoperands_begin())->isInvariant();
893 });
894
895 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
896 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
897 });
898
899 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
900 const MachineMemOperand *MMO = *MI.memoperands_begin();
901 return MMO->getAlign() >= Align(MMO->getSize().getValue());
902 });
903
904 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
905 const MachineMemOperand *MMO = *MI.memoperands_begin();
906 const unsigned MemSize = 8 * MMO->getSize().getValue();
907 return MemSize == 16 || MemSize == 8;
908 });
909
910 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
911 const MachineMemOperand *MMO = *MI.memoperands_begin();
912 return 8 * MMO->getSize().getValue() == 32;
913 });
914
915 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
916 (isConst || isInvMMO || isNoClobberMMO);
917
918 // clang-format off
919 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
920 addRulesForGOpcs(OpcList: {G_LOAD})
921 // flat, addrspace(0), never uniform - flat_load
922 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
923 .Any(Init: {.Predicate: {DivB32, P0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
924 .Any(Init: {.Predicate: {DivB64, P0}, .OperandMapping: {{VgprB64}, {VgprP0}}})
925 .Any(Init: {.Predicate: {DivB96, P0}, .OperandMapping: {{VgprB96}, {VgprP0}}})
926 .Any(Init: {.Predicate: {DivB128, P0}, .OperandMapping: {{VgprB128}, {VgprP0}}})
927
928 // global, addrspace(1)
929 // divergent - global_load
930 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
931 .Any(Init: {.Predicate: {DivB32, P1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
932 .Any(Init: {.Predicate: {DivB64, P1}, .OperandMapping: {{VgprB64}, {VgprP1}}})
933 .Any(Init: {.Predicate: {DivB96, P1}, .OperandMapping: {{VgprB96}, {VgprP1}}})
934 .Any(Init: {.Predicate: {DivB128, P1}, .OperandMapping: {{VgprB128}, {VgprP1}}})
935 .Any(Init: {.Predicate: {DivB256, P1}, .OperandMapping: {{VgprB256}, {VgprP1}, SplitLoad}})
936 .Any(Init: {.Predicate: {DivB512, P1}, .OperandMapping: {{VgprB512}, {VgprP1}, SplitLoad}})
937
938 // uniform - s_load
939 .Any(Init: {.Predicate: {{UniS16, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
940 .Any(Init: {.Predicate: {{UniS16, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
941 .Any(Init: {.Predicate: {{UniB32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
942 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
943 .Any(Init: {.Predicate: {{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
944 .Any(Init: {.Predicate: {{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}) //32-bit load
945 .Any(Init: {.Predicate: {{UniB64, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP1}}})
946 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, WidenLoad}}, STPred: !hasSMRDx3)
947 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, SplitLoad}}, STPred: !hasSMRDx3)
948 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}}}, STPred: hasSMRDx3)
949 .Any(Init: {.Predicate: {{UniB128, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP1}}})
950 .Any(Init: {.Predicate: {{UniB256, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
951 .Any(Init: {.Predicate: {{UniB512, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
952
953 // Uniform via global or buffer load, for example volatile or non-aligned
954 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
955 // selected as global_load, use SgprP1 for pointer instead to match
956 // patterns without flat-for-global, default for GFX7 and older.
957 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
958 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
959 .Any(Init: {.Predicate: {{UniS16, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
960 .Any(Init: {.Predicate: {{UniS16, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
961 .Any(Init: {.Predicate: {{UniB32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
962 .Any(Init: {.Predicate: {{UniB32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
963 .Any(Init: {.Predicate: {{UniB64, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP1}}})
964 .Any(Init: {.Predicate: {{UniB96, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP1}}})
965 .Any(Init: {.Predicate: {{UniB128, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP1}}})
966 .Any(Init: {.Predicate: {{UniB256, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP1}, SplitLoad}})
967 .Any(Init: {.Predicate: {{UniB512, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP1}, SplitLoad}})
968
969 // local, addrspace(3) - ds_load
970 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
971 .Any(Init: {.Predicate: {DivB32, P3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
972 .Any(Init: {.Predicate: {DivB64, P3}, .OperandMapping: {{VgprB64}, {VgprP3}}})
973 .Any(Init: {.Predicate: {DivB96, P3}, .OperandMapping: {{VgprB96}, {VgprP3}}})
974 .Any(Init: {.Predicate: {DivB128, P3}, .OperandMapping: {{VgprB128}, {VgprP3}}})
975
976 .Any(Init: {.Predicate: {UniS16, P3}, .OperandMapping: {{UniInVgprS16}, {SgprP3}}}, STPred: usesTrue16) // 16-bit load
977 .Any(Init: {.Predicate: {UniB32, P3}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
978 .Any(Init: {.Predicate: {UniB64, P3}, .OperandMapping: {{UniInVgprB64}, {VgprP3}}})
979 .Any(Init: {.Predicate: {UniB96, P3}, .OperandMapping: {{UniInVgprB96}, {VgprP3}}})
980 .Any(Init: {.Predicate: {UniB128, P3}, .OperandMapping: {{UniInVgprB128}, {VgprP3}}})
981
982 // constant, addrspace(4)
983 // divergent - global_load
984 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
985 .Any(Init: {.Predicate: {DivB32, P4}, .OperandMapping: {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
986 .Any(Init: {.Predicate: {DivB64, P4}, .OperandMapping: {{VgprB64}, {VgprP4}}})
987 .Any(Init: {.Predicate: {DivB96, P4}, .OperandMapping: {{VgprB96}, {VgprP4}}})
988 .Any(Init: {.Predicate: {DivB128, P4}, .OperandMapping: {{VgprB128}, {VgprP4}}})
989 .Any(Init: {.Predicate: {DivB256, P4}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
990 .Any(Init: {.Predicate: {DivB512, P4}, .OperandMapping: {{VgprB512}, {VgprP4}, SplitLoad}})
991
992 // uniform - s_load
993 .Any(Init: {.Predicate: {{UniS16, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
994 .Any(Init: {.Predicate: {{UniS16, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
995 .Any(Init: {.Predicate: {{UniB32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
996 .Any(Init: {.Predicate: {{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
997 .Any(Init: {.Predicate: {{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) //32-bit load
998 .Any(Init: {.Predicate: {{UniB64, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP4}}})
999 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasSMRDx3)
1000 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasSMRDx3)
1001 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasSMRDx3)
1002 .Any(Init: {.Predicate: {{UniB128, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP4}}})
1003 .Any(Init: {.Predicate: {{UniB256, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
1004 .Any(Init: {.Predicate: {{UniB512, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
1005
1006 // uniform in vgpr - global_load or buffer_load
1007 .Any(Init: {.Predicate: {{UniS16, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1008 .Any(Init: {.Predicate: {{UniS16, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
1009 .Any(Init: {.Predicate: {{UniB32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1010 .Any(Init: {.Predicate: {{UniB32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1011 .Any(Init: {.Predicate: {{UniB64, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP4}}})
1012 .Any(Init: {.Predicate: {{UniB96, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP4}}})
1013 .Any(Init: {.Predicate: {{UniB128, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP4}}})
1014 .Any(Init: {.Predicate: {{UniB256, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1015 .Any(Init: {.Predicate: {{UniB512, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1016
1017 // private, addrspace(5), never uniform - scratch_load
1018 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16)
1019 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1020 .Any(Init: {.Predicate: {DivB64, P5}, .OperandMapping: {{VgprB64}, {VgprP5}}})
1021 .Any(Init: {.Predicate: {DivB96, P5}, .OperandMapping: {{VgprB96}, {VgprP5}}})
1022 .Any(Init: {.Predicate: {DivB128, P5}, .OperandMapping: {{VgprB128}, {VgprP5}}})
1023
1024 .Any(Init: {.Predicate: {DivS32, Ptr128}, .OperandMapping: {{Vgpr32}, {VgprPtr128}}});
1025
1026
1027 addRulesForGOpcs(OpcList: {G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1028 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0}}})
1029
1030 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1}}})
1031 .Any(Init: {.Predicate: {{UniS32, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1032 .Any(Init: {.Predicate: {{UniS32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}}}, STPred: hasSMRDSmall)
1033 .Any(Init: {.Predicate: {{UniS32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: !hasSMRDSmall)
1034 .Any(Init: {.Predicate: {{UniS32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: hasSMRDSmall)
1035
1036 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3}}})
1037 .Any(Init: {.Predicate: {UniS32, P3}, .OperandMapping: {{UniInVgprS32}, {VgprP3}}})
1038
1039 .Any(Init: {.Predicate: {DivS32, P4}, .OperandMapping: {{Vgpr32}, {VgprP4}}})
1040 .Any(Init: {.Predicate: {{UniS32, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1041 .Any(Init: {.Predicate: {{UniS32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}}}, STPred: hasSMRDSmall)
1042 .Any(Init: {.Predicate: {{UniS32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: !hasSMRDSmall)
1043 .Any(Init: {.Predicate: {{UniS32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: hasSMRDSmall)
1044
1045 .Any(Init: {.Predicate: {DivS32, P5}, .OperandMapping: {{Vgpr32}, {VgprP5}}});
1046
1047 addRulesForGOpcs(OpcList: {G_STORE})
1048 // addrspace(0)
1049 .Any(Init: {.Predicate: {S16, P0}, .OperandMapping: {{}, {Vgpr16, VgprP0}}}, STPred: usesTrue16) // 16-bit store
1050 .Any(Init: {.Predicate: {B32, P0}, .OperandMapping: {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1051 .Any(Init: {.Predicate: {B64, P0}, .OperandMapping: {{}, {VgprB64, VgprP0}}})
1052 .Any(Init: {.Predicate: {B96, P0}, .OperandMapping: {{}, {VgprB96, VgprP0}}})
1053 .Any(Init: {.Predicate: {B128, P0}, .OperandMapping: {{}, {VgprB128, VgprP0}}})
1054
1055 // addrspace(1), there are no stores to addrspace(4)
1056 // For targets:
1057 // - with "+flat-for-global" - global_store
1058 // - without(-flat-for-global) - buffer_store addr64
1059 .Any(Init: {.Predicate: {S16, DivP1}, .OperandMapping: {{}, {Vgpr16, VgprP1}}}, STPred: usesTrue16) // 16-bit store
1060 .Any(Init: {.Predicate: {B32, DivP1}, .OperandMapping: {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1061 .Any(Init: {.Predicate: {B64, DivP1}, .OperandMapping: {{}, {VgprB64, VgprP1}}})
1062 .Any(Init: {.Predicate: {B96, DivP1}, .OperandMapping: {{}, {VgprB96, VgprP1}}})
1063 .Any(Init: {.Predicate: {B128, DivP1}, .OperandMapping: {{}, {VgprB128, VgprP1}}})
1064
1065 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1066 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1067 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1068 .Any(Init: {.Predicate: {S16, UniP1}, .OperandMapping: {{}, {Vgpr16, SgprP1}}}, STPred: usesTrue16) // 16-bit store
1069 .Any(Init: {.Predicate: {B32, UniP1}, .OperandMapping: {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1070 .Any(Init: {.Predicate: {B64, UniP1}, .OperandMapping: {{}, {VgprB64, SgprP1}}})
1071 .Any(Init: {.Predicate: {B96, UniP1}, .OperandMapping: {{}, {VgprB96, SgprP1}}})
1072 .Any(Init: {.Predicate: {B128, UniP1}, .OperandMapping: {{}, {VgprB128, SgprP1}}})
1073
1074 // addrspace(3) and addrspace(5)
1075 .Any(Init: {.Predicate: {S16, Ptr32}, .OperandMapping: {{}, {Vgpr16, VgprPtr32}}}, STPred: usesTrue16) // 16-bit store
1076 .Any(Init: {.Predicate: {B32, Ptr32}, .OperandMapping: {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1077 .Any(Init: {.Predicate: {B64, Ptr32}, .OperandMapping: {{}, {VgprB64, VgprPtr32}}})
1078 .Any(Init: {.Predicate: {B96, Ptr32}, .OperandMapping: {{}, {VgprB96, VgprPtr32}}})
1079 .Any(Init: {.Predicate: {B128, Ptr32}, .OperandMapping: {{}, {VgprB128, VgprPtr32}}});
1080
1081 // clang-format on
1082
1083 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1084 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1085 FastTypes: StandardB)
1086 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1087 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1088 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1089 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1090 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1091 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1092 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1093 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1094
1095 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1096 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1097 FastTypes: StandardB)
1098 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1099 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1100
1101 addRulesForGOpcs(
1102 OpcList: {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1103 FastTypes: StandardB)
1104 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1105 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1106
1107 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1108 FastTypes: StandardB)
1109 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1110 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1111 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1112 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1113 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1114 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1115 .Any(Init: {.Predicate: {DivB160}, .OperandMapping: {{VgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1116 .Any(Init: {.Predicate: {UniB160},
1117 .OperandMapping: {{UniInVgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1118
1119 addRulesForGOpcs(
1120 OpcList: {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1121 FastTypes: StandardB)
1122 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1123 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1124 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1125 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1126 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1127 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1128
1129 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1130 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1131 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1132 G_AMDGPU_TBUFFER_STORE_FORMAT,
1133 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1134 .Any(Init: {.Predicate: {B32}, .OperandMapping: {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1135 .Any(Init: {.Predicate: {B64}, .OperandMapping: {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1136 .Any(Init: {.Predicate: {B96}, .OperandMapping: {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1137 .Any(Init: {.Predicate: {B128}, .OperandMapping: {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1138
1139 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1140 // address components are VGPR.
1141 //
1142 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1143 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1144 // idxen_imm
1145 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_FADD})
1146 .Any(Init: {.Predicate: {S32, S32, V4S32, S32, S32, S32},
1147 .OperandMapping: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1148 .Any(Init: {.Predicate: {S64, S64, V4S32, S32, S32, S32},
1149 .OperandMapping: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1150 .Any(Init: {.Predicate: {V2S16, V2S16, V4S32, S32, S32, S32},
1151 .OperandMapping: {{VgprV2S16},
1152 {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1153
1154 addRulesForGOpcs(OpcList: {G_PTR_ADD})
1155 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1156 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1157 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1158 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1159
1160 addRulesForGOpcs(OpcList: {G_INTTOPTR})
1161 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
1162 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
1163 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
1164 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
1165 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
1166 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
1167
1168 addRulesForGOpcs(OpcList: {G_PTRTOINT})
1169 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
1170 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
1171 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
1172 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
1173 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
1174 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
1175
1176 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1177 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1178 addRulesForGOpcs(OpcList: {G_PTRMASK})
1179 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
1180 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
1181 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {SgprP3, Sgpr32}}})
1182 .Any(Init: {.Predicate: {DivP3}, .OperandMapping: {{VgprP3}, {VgprP3, Vgpr32}}});
1183
1184 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}});
1185
1186 addRulesForGOpcs(OpcList: {G_BITREVERSE}, FastTypes: Standard)
1187 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1188 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1189 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
1190 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1191
1192 addRulesForGOpcs(OpcList: {G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_UNDEF,
1193 G_CTTZ_ZERO_UNDEF})
1194 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
1195 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1196 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
1197 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}, SplitBitCount64To32}});
1198
1199 addRulesForGOpcs(OpcList: {G_FENCE}).Any(Init: {.Predicate: {{}}, .OperandMapping: {{}, {}}});
1200
1201 addRulesForGOpcs(OpcList: {G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, FastTypes: Standard)
1202 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
1203
1204 addRulesForGOpcs(OpcList: {G_BLOCK_ADDR}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1205
1206 addRulesForGOpcs(OpcList: {G_GLOBAL_VALUE})
1207 .Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}})
1208 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {}}})
1209 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {}}})
1210 .Any(Init: {.Predicate: {UniP4}, .OperandMapping: {{SgprP4}, {}}})
1211 .Any(Init: {.Predicate: {UniP8}, .OperandMapping: {{SgprP8}, {}}});
1212
1213 addRulesForGOpcs(OpcList: {G_AMDGPU_WAVE_ADDRESS}).Any(Init: {.Predicate: {UniP5}, .OperandMapping: {{SgprP5}, {}}});
1214
1215 addRulesForGOpcs(OpcList: {G_SI_CALL})
1216 .Any(Init: {.Predicate: {_, UniP0}, .OperandMapping: {{None}, {SgprP0}}})
1217 .Any(Init: {.Predicate: {_, DivP0}, .OperandMapping: {{None}, {SgprP0Call_WF}}})
1218 .Any(Init: {.Predicate: {_, UniP4}, .OperandMapping: {{None}, {SgprP4}}})
1219 .Any(Init: {.Predicate: {_, DivP4}, .OperandMapping: {{None}, {SgprP4Call_WF}}});
1220
1221 bool hasSALUFloat = ST->hasSALUFloatInsts();
1222
1223 addRulesForGOpcs(OpcList: {G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, FastTypes: Standard)
1224 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1225 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1226 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1227 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1228 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1229 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1230 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1231 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1232 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, STPred: !hasSALUFloat)
1233 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
1234 STPred: hasSALUFloat)
1235 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1236
1237 addRulesForGOpcs(OpcList: {G_FSUB, G_STRICT_FSUB}, FastTypes: Standard)
1238 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1239 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1240 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1241 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1242 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1243 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1244
1245 addRulesForGOpcs(OpcList: {G_FMAD}, FastTypes: Standard)
1246 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1247 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1248 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1249 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1250
1251 addRulesForGOpcs(OpcList: {G_FLDEXP, G_STRICT_FLDEXP}, FastTypes: Standard)
1252 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1253 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1254 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1255 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1256 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1257 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
1258
1259 addRulesForGOpcs(OpcList: {G_FMA, G_STRICT_FMA}, FastTypes: Standard)
1260 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1261 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1262 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1263 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1264 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
1265 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1266 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1267 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1268 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1269 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1270 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1271 .Uni(Ty: V2S16,
1272 RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
1273 STPred: hasSALUFloat)
1274 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
1275 STPred: !hasSALUFloat);
1276
1277 addRulesForGOpcs(OpcList: {G_AMDGPU_FMED3}, FastTypes: Standard)
1278 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1279 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1280 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1281 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1282
1283 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1284 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1285 // instructions on SALU.
1286 addRulesForGOpcs(OpcList: {G_AMDGPU_SMED3}, FastTypes: Standard)
1287 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1288 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1289
1290 // FNEG and FABS are either folded as source modifiers or can be selected as
1291 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1292 // targets without SALU float we still select them as VGPR since there would
1293 // be no real sgpr use.
1294 addRulesForGOpcs(OpcList: {G_FNEG, G_FABS}, FastTypes: Standard)
1295 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasSALUFloat)
1296 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1297 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1298 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}}, STPred: !hasSALUFloat)
1299 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1300 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1301 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1302 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1303 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}}, STPred: !hasSALUFloat)
1304 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, STPred: hasSALUFloat)
1305 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1306 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1307 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1308
1309 addRulesForGOpcs(OpcList: {G_FCANONICALIZE}, FastTypes: Standard)
1310 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1311 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1312 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1313 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1314 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1315 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1316 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
1317 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1318 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1319 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1320
1321 bool hasPST = ST->hasPseudoScalarTrans();
1322 addRulesForGOpcs(OpcList: {G_FSQRT}, FastTypes: Standard)
1323 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1324 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasPST)
1325 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasPST);
1326
1327 addRulesForGOpcs(OpcList: {G_FPTOUI, G_FPTOSI})
1328 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1329 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1330 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1331 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat)
1332 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1333 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1334 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1335 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1336 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1337 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}});
1338
1339 addRulesForGOpcs(OpcList: {G_UITOFP, G_SITOFP})
1340 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1341 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1342 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1343 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1344 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1345 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1346 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1347 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1348 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1349 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}});
1350
1351 addRulesForGOpcs(OpcList: {G_FPEXT})
1352 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1353 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1354 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}})
1355 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1356 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat);
1357
1358 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_PK_I16_I32}, FastTypes: Standard)
1359 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1360 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {Vgpr32, Vgpr32}});
1361
1362 addRulesForGOpcs(OpcList: {G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, FastTypes: Standard)
1363 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1364 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
1365
1366 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1367
1368 addRulesForGOpcs(OpcList: {G_FMINIMUM, G_FMAXIMUM}, FastTypes: Standard)
1369 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUMinimumMaximumInsts)
1370 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUMinimumMaximumInsts)
1371 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1372 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUMinimumMaximumInsts)
1373 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUMinimumMaximumInsts)
1374 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1375 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1376 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1377 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1378 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1379
1380 addRulesForGOpcs(OpcList: {G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1381 FastTypes: Standard)
1382 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1383 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1384 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1385 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1386 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1387 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1388 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1389 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1390 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1391 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1392
1393 addRulesForGOpcs(OpcList: {G_FPTRUNC})
1394 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1395 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1396 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1397 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {VgprV2S32}}})
1398 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
1399 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1400 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat);
1401
1402 addRulesForGOpcs(OpcList: {G_IS_FPCLASS})
1403 .Any(Init: {.Predicate: {DivS1, S16}, .OperandMapping: {{Vcc}, {Vgpr16}}})
1404 .Any(Init: {.Predicate: {UniS1, S16}, .OperandMapping: {{UniInVcc}, {Vgpr16}}})
1405 .Any(Init: {.Predicate: {DivS1, S32}, .OperandMapping: {{Vcc}, {Vgpr32}}})
1406 .Any(Init: {.Predicate: {UniS1, S32}, .OperandMapping: {{UniInVcc}, {Vgpr32}}})
1407 .Any(Init: {.Predicate: {DivS1, S64}, .OperandMapping: {{Vcc}, {Vgpr64}}})
1408 .Any(Init: {.Predicate: {UniS1, S64}, .OperandMapping: {{UniInVcc}, {Vgpr64}}});
1409
1410 addRulesForGOpcs(OpcList: {G_FCMP}, FastTypes: Standard)
1411 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1412 STPred: hasSALUFloat)
1413 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1414 STPred: !hasSALUFloat)
1415 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
1416 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1417 STPred: hasSALUFloat)
1418 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1419 STPred: !hasSALUFloat)
1420 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
1421 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1422 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}});
1423
1424 addRulesForGOpcs(OpcList: {G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1425 G_FEXP2, G_FLOG2},
1426 FastTypes: Standard)
1427 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1428 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1429 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1430 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1431 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1432 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1433
1434 using namespace Intrinsic;
1435
1436 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}});
1437
1438 addRulesForIOpcs(OpcList: {amdgcn_s_getreg}).Any(Init: {.Predicate: {}, .OperandMapping: {{Sgpr32}, {IntrId, Imm}}});
1439
1440 addRulesForIOpcs(OpcList: {amdgcn_groupstaticsize}).Any(Init: {.Predicate: {S32}, .OperandMapping: {{Sgpr32}, {IntrId}}});
1441
1442 // Intrinsics with no register operands.
1443 addRulesForIOpcs(OpcList: {amdgcn_endpgm,
1444 amdgcn_s_barrier,
1445 amdgcn_s_barrier_signal,
1446 amdgcn_s_barrier_wait,
1447 amdgcn_s_nop,
1448 amdgcn_s_sethalt,
1449 amdgcn_s_setprio,
1450 amdgcn_s_sleep,
1451 amdgcn_s_wait_asynccnt,
1452 amdgcn_s_wait_bvhcnt,
1453 amdgcn_s_wait_dscnt,
1454 amdgcn_s_wait_event,
1455 amdgcn_s_wait_event_export_ready,
1456 amdgcn_s_wait_expcnt,
1457 amdgcn_s_wait_kmcnt,
1458 amdgcn_s_wait_loadcnt,
1459 amdgcn_s_wait_samplecnt,
1460 amdgcn_s_wait_storecnt,
1461 amdgcn_s_wait_tensorcnt,
1462 amdgcn_s_waitcnt,
1463 amdgcn_wave_barrier})
1464 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}}});
1465
1466 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1467 addRulesForIOpcs(OpcList: {amdgcn_end_cf})
1468 .Any(Init: {.Predicate: {_, UniS32}, .OperandMapping: {{}, {IntrId, Sgpr32}}})
1469 .Any(Init: {.Predicate: {_, UniS64}, .OperandMapping: {{}, {IntrId, Sgpr64}}});
1470
1471 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
1472 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1473 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1474
1475 addRulesForIOpcs(OpcList: {amdgcn_exp})
1476 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32},
1477 .OperandMapping: {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1478
1479 addRulesForIOpcs(OpcList: {amdgcn_exp_row})
1480 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32, _, S32},
1481 .OperandMapping: {{},
1482 {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm,
1483 SgprB32_M0}}});
1484
1485 addRulesForIOpcs(OpcList: {amdgcn_lds_param_load}, FastTypes: Standard)
1486 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1487
1488 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
1489 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1490
1491 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
1492 .Any(Init: {.Predicate: {UniS32, _, DivS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}})
1493 // this should not exist in the first place, it is from call lowering
1494 // readfirstlaning just in case register is not in sgpr.
1495 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
1496
1497 addRulesForIOpcs(OpcList: {amdgcn_bitop3}, FastTypes: Standard)
1498 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1499 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1500 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1501 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1502
1503 addRulesForIOpcs(OpcList: {amdgcn_mul_u24, amdgcn_mul_i24}, FastTypes: Standard)
1504 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1505 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1506 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1507 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1508
1509 addRulesForIOpcs(OpcList: {amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1510 FastTypes: Standard)
1511 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1512 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1513
1514 addRulesForIOpcs(OpcList: {amdgcn_fma_legacy}, FastTypes: Standard)
1515 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1516 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1517
1518 addRulesForIOpcs(OpcList: {amdgcn_frexp_mant, amdgcn_fract}, FastTypes: Standard)
1519 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1520 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1521 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1522 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1523 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
1524 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
1525
1526 addRulesForIOpcs(OpcList: {amdgcn_prng_b32})
1527 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
1528 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}});
1529
1530 addRulesForIOpcs(OpcList: {amdgcn_sffbh}, FastTypes: Standard)
1531 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
1532 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
1533
1534 addRulesForIOpcs(OpcList: {amdgcn_ubfe, amdgcn_sbfe}, FastTypes: Standard)
1535 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1536 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1537 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1538 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1539
1540 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_u16, amdgcn_cvt_pk_i16, amdgcn_cvt_pkrtz},
1541 FastTypes: Standard)
1542 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
1543 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
1544
1545 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b64})
1546 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1547 .Any(Init: {.Predicate: {DivB32}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1}}});
1548
1549 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b128})
1550 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1551 .Any(Init: {.Predicate: {DivB128}, .OperandMapping: {{VgprB128}, {IntrId, SgprP1}}});
1552
1553 addRulesForIOpcs(OpcList: {amdgcn_global_atomic_ordered_add_b64})
1554 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1555
1556 addRulesForIOpcs(
1557 OpcList: {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, FastTypes: Standard)
1558 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1559
1560 addRulesForIOpcs(OpcList: {amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1561 FastTypes: Standard)
1562 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1563
1564 addRulesForIOpcs(OpcList: {amdgcn_raw_buffer_load_lds})
1565 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1566
1567 addRulesForIOpcs(OpcList: {amdgcn_struct_buffer_load_lds})
1568 .Any(Init: {.Predicate: {_},
1569 .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1570
1571 addRulesForIOpcs(OpcList: {amdgcn_raw_ptr_buffer_load_lds})
1572 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1573
1574 addRulesForIOpcs(OpcList: {amdgcn_struct_ptr_buffer_load_lds})
1575 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1576
1577 addRulesForIOpcs(OpcList: {amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
1578 amdgcn_strict_wqm},
1579 FastTypes: StandardB)
1580 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
1581 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32}})
1582 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}})
1583 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64}})
1584 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, VgprB96}})
1585 .Uni(Ty: B96, RuleApplyIDs: {{SgprB96}, {IntrId, SgprB96}})
1586 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB128}})
1587 .Uni(Ty: B128, RuleApplyIDs: {{SgprB128}, {IntrId, SgprB128}})
1588 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {IntrId, SgprB256}}})
1589 .Any(Init: {.Predicate: {DivB256}, .OperandMapping: {{VgprB256}, {IntrId, VgprB256}}})
1590 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {IntrId, SgprB512}}})
1591 .Any(Init: {.Predicate: {DivB512}, .OperandMapping: {{VgprB512}, {IntrId, VgprB512}}});
1592
1593 addRulesForIOpcs(OpcList: {amdgcn_sin, amdgcn_cos}, FastTypes: Standard)
1594 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1595 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1596 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1597 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}});
1598
1599 addRulesForIOpcs(
1600 OpcList: {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, FastTypes: Standard)
1601 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
1602
1603 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop1_rtn}, FastTypes: Standard)
1604 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1605
1606 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop2_rtn}, FastTypes: Standard)
1607 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
1608
1609 addRulesForIOpcs(OpcList: {amdgcn_ds_swizzle}, FastTypes: Standard)
1610 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1611 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
1612
1613 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
1614 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprP3}}});
1615
1616 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr6_b96})
1617 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprP3}}});
1618
1619 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr16_b64})
1620 .Any(Init: {.Predicate: {DivV4S16}, .OperandMapping: {{VgprV4S16}, {IntrId, VgprP3}}});
1621
1622} // end initialize rules
1623