1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128);
73 case Ptr32:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
75 case Ptr64:
76 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
77 case Ptr128:
78 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isUniform(V: Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128) && MUI.isUniform(V: Reg);
125 case UniPtr32:
126 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg);
127 case UniPtr64:
128 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg);
129 case UniPtr128:
130 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isUniform(V: Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(V: Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(V: Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isDivergent(V: Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg);
182 case DivPtr32:
183 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg);
184 case DivPtr64:
185 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg);
186 case DivPtr128:
187 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) && MUI.isDivergent(V: Reg);
192 case DivB32:
193 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg);
194 case DivB64:
195 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg);
196 case DivB96:
197 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg);
198 case DivB128:
199 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg);
200 case DivB160:
201 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(V: Reg);
202 case DivB256:
203 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg);
204 case DivB512:
205 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg);
206 case DivBRC: {
207 if (!MUI.isDivergent(V: Reg))
208 return false;
209 // Check if there is VGPR register class of same size as the LLT.
210 const SIRegisterInfo *TRI =
211 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
212 return TRI->getSGPRClassForBitWidth(BitWidth: MRI.getType(Reg).getSizeInBits());
213 }
214 case _:
215 return true;
216 default:
217 llvm_unreachable("missing matchUniformityAndLLT");
218 }
219}
220
221bool PredicateMapping::match(const MachineInstr &MI,
222 const MachineUniformityInfo &MUI,
223 const MachineRegisterInfo &MRI) const {
224 // Check LLT signature.
225 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
226 const MachineOperand &MO = MI.getOperand(i);
227 if (OpUniformityAndTypes[i] == _) {
228 assert((!MI.getOperand(i).isReg() ||
229 !MI.getOperand(i).getReg().isVirtual()) &&
230 "_ is for non-register and physical register operands only");
231 continue;
232 }
233
234 // Remaining IDs check registers.
235 if (!MO.isReg())
236 return false;
237
238 if (!matchUniformityAndLLT(Reg: MO.getReg(), UniID: OpUniformityAndTypes[i], MUI, MRI))
239 return false;
240 }
241
242 // More complex check.
243 if (TestFunc)
244 return TestFunc(MI);
245
246 return true;
247}
248
249SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
250
251SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
252 : FastTypes(FastTypes) {}
253
254UniformityLLTOpPredicateID LLTToId(LLT Ty) {
255 if (Ty == LLT::scalar(SizeInBits: 16))
256 return S16;
257 if (Ty == LLT::scalar(SizeInBits: 32))
258 return S32;
259 if (Ty == LLT::scalar(SizeInBits: 64))
260 return S64;
261 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
262 return V2S16;
263 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
264 return V2S32;
265 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
266 return V3S32;
267 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
268 return V4S32;
269 return _;
270}
271
272UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
273 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
274 isAnyPtr(Ty, Width: 32))
275 return B32;
276 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
277 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
278 return B64;
279 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
280 return B96;
281 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ||
282 Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 128))
283 return B128;
284 return _;
285}
286
287const RegBankLLTMapping *
288SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
289 const MachineRegisterInfo &MRI,
290 const MachineUniformityInfo &MUI) const {
291 // Search in "Fast Rules".
292 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
293 // slot that could "match fast Predicate". If not, InvalidMapping is
294 // returned which results in failure, does not search "Slow Rules".
295 if (FastTypes != NoFastRules) {
296 Register Reg = MI.getOperand(i: 0).getReg();
297 int Slot;
298 if (FastTypes == StandardB)
299 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
300 else
301 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
302
303 if (Slot != -1)
304 return MUI.isUniform(V: Reg) ? &Uni[Slot] : &Div[Slot];
305 }
306
307 // Slow search for more complex rules.
308 for (const RegBankLegalizeRule &Rule : Rules) {
309 if (Rule.Predicate.match(MI, MUI, MRI))
310 return &Rule.OperandMapping;
311 }
312
313 return nullptr;
314}
315
316void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
317 Rules.push_back(Elt: Rule);
318}
319
320void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
321 RegBankLLTMapping RuleApplyIDs) {
322 int Slot = getFastPredicateSlot(Ty);
323 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
324 Div[Slot] = std::move(RuleApplyIDs);
325}
326
327void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
328 RegBankLLTMapping RuleApplyIDs) {
329 int Slot = getFastPredicateSlot(Ty);
330 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
331 Uni[Slot] = std::move(RuleApplyIDs);
332}
333
334int SetOfRulesForOpcode::getFastPredicateSlot(
335 UniformityLLTOpPredicateID Ty) const {
336 switch (FastTypes) {
337 case Standard: {
338 switch (Ty) {
339 case S32:
340 return 0;
341 case S16:
342 return 1;
343 case S64:
344 return 2;
345 case V2S16:
346 return 3;
347 default:
348 return -1;
349 }
350 }
351 case StandardB: {
352 switch (Ty) {
353 case B32:
354 return 0;
355 case B64:
356 return 1;
357 case B96:
358 return 2;
359 case B128:
360 return 3;
361 default:
362 return -1;
363 }
364 }
365 case Vector: {
366 switch (Ty) {
367 case S32:
368 return 0;
369 case V2S32:
370 return 1;
371 case V3S32:
372 return 2;
373 case V4S32:
374 return 3;
375 default:
376 return -1;
377 }
378 }
379 default:
380 return -1;
381 }
382}
383
384RegBankLegalizeRules::RuleSetInitializer
385RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
386 FastRulesTypes FastTypes) {
387 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
388}
389
390RegBankLegalizeRules::RuleSetInitializer
391RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
392 FastRulesTypes FastTypes) {
393 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
394}
395
396const SetOfRulesForOpcode *
397RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
398 unsigned Opc = MI.getOpcode();
399 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
400 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
401 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
402 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
403 auto IRAIt = IRulesAlias.find(Val: IntrID);
404 if (IRAIt == IRulesAlias.end())
405 return nullptr;
406 return &IRules.at(Val: IRAIt->second);
407 }
408
409 auto GRAIt = GRulesAlias.find(Val: Opc);
410 if (GRAIt == GRulesAlias.end())
411 return nullptr;
412 return &GRules.at(Val: GRAIt->second);
413}
414
415// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
416class Predicate {
417private:
418 struct Elt {
419 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
420 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
421 // Sequences of && and || will be represented by jumps, for example:
422 // (A && B && ... X) or (A && B && ... X) || Y
423 // A == true jump to B
424 // A == false jump to end or Y, result is A(false) or Y
425 // (A || B || ... X) or (A || B || ... X) && Y
426 // A == true jump to end or Y, result is A(true) or Y
427 // A == false jump to B
428 // Notice that when negating expression, we simply flip Neg on each Pred
429 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
430 std::function<bool(const MachineInstr &)> Pred;
431 bool Neg; // Neg of Pred is calculated before jump
432 unsigned TJumpOffset;
433 unsigned FJumpOffset;
434 };
435
436 SmallVector<Elt, 8> Expression;
437
438 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
439
440public:
441 Predicate(std::function<bool(const MachineInstr &)> Pred) {
442 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
443 };
444
445 bool operator()(const MachineInstr &MI) const {
446 unsigned Idx = 0;
447 unsigned ResultIdx = Expression.size();
448 bool Result;
449 do {
450 Result = Expression[Idx].Pred(MI);
451 Result = Expression[Idx].Neg ? !Result : Result;
452 if (Result) {
453 Idx += Expression[Idx].TJumpOffset;
454 } else {
455 Idx += Expression[Idx].FJumpOffset;
456 }
457 } while ((Idx != ResultIdx));
458
459 return Result;
460 };
461
462 Predicate operator!() const {
463 SmallVector<Elt, 8> NegExpression;
464 for (const Elt &ExprElt : Expression) {
465 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
466 .FJumpOffset: ExprElt.TJumpOffset});
467 }
468 return Predicate(std::move(NegExpression));
469 };
470
471 Predicate operator&&(const Predicate &RHS) const {
472 SmallVector<Elt, 8> AndExpression = Expression;
473
474 unsigned RHSSize = RHS.Expression.size();
475 unsigned ResultIdx = Expression.size();
476 for (unsigned i = 0; i < ResultIdx; ++i) {
477 // LHS results in false, whole expression results in false.
478 if (i + AndExpression[i].FJumpOffset == ResultIdx)
479 AndExpression[i].FJumpOffset += RHSSize;
480 }
481
482 AndExpression.append(RHS: RHS.Expression);
483
484 return Predicate(std::move(AndExpression));
485 }
486
487 Predicate operator||(const Predicate &RHS) const {
488 SmallVector<Elt, 8> OrExpression = Expression;
489
490 unsigned RHSSize = RHS.Expression.size();
491 unsigned ResultIdx = Expression.size();
492 for (unsigned i = 0; i < ResultIdx; ++i) {
493 // LHS results in true, whole expression results in true.
494 if (i + OrExpression[i].TJumpOffset == ResultIdx)
495 OrExpression[i].TJumpOffset += RHSSize;
496 }
497
498 OrExpression.append(RHS: RHS.Expression);
499
500 return Predicate(std::move(OrExpression));
501 }
502};
503
504// Initialize rules
505RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
506 MachineRegisterInfo &_MRI)
507 : ST(&_ST), MRI(&_MRI) {
508
509 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
510 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
511 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
512 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
513 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
514 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
515 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
516 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}})
517 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
518
519 addRulesForGOpcs(OpcList: {G_UADDO, G_USUBO}, FastTypes: Standard)
520 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
521 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
522
523 addRulesForGOpcs(OpcList: {G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, FastTypes: Standard)
524 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
525 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
526
527 addRulesForGOpcs(OpcList: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, FastTypes: Standard)
528 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
529 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
530 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
531 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
532 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
533 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
534
535 bool HasVecMulU64 = ST->hasVectorMulU64();
536 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard)
537 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
538 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
539 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
540 .Uni(Ty: S64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
541 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
542 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
543 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
544 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}}, STPred: HasVecMulU64)
545 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, STPred: !HasVecMulU64);
546
547 bool hasMulHi = ST->hasScalarMulHiInsts();
548 addRulesForGOpcs(OpcList: {G_UMULH, G_SMULH}, FastTypes: Standard)
549 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
550 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasMulHi)
551 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasMulHi);
552
553 addRulesForGOpcs(OpcList: {G_AMDGPU_MAD_U64_U32}, FastTypes: Standard)
554 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
555 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
556
557 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
558 addRulesForGOpcs(OpcList: {G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, FastTypes: Standard)
559 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, STPred: HasScalarSMulU64)
560 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
561
562 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
563 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
564 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
565 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
566 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
567 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
568 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
569 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
570 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
571
572 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
573 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
574 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
575 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
576 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
577 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
578 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
579 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
580 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
581
582 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
583 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
584 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
585 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
586 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
587 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
588 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
589 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
590 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
591
592 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
593 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
594 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
595 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
596 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
597 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
598 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
599 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
600 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
601
602 addRulesForGOpcs(OpcList: {G_FSHR}, FastTypes: Standard)
603 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
604 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
605
606 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
607
608 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
609 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
610 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
611 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
612 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
613
614 addRulesForGOpcs(OpcList: {G_SMIN, G_SMAX}, FastTypes: Standard)
615 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
616 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
617 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
618 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
619 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
620 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
621
622 addRulesForGOpcs(OpcList: {G_UMIN, G_UMAX}, FastTypes: Standard)
623 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
624 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
625 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
626 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
627 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
628 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
629
630 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
631 // here, rest is trivially regbankselected earlier
632 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
633 addRulesForGOpcs(OpcList: {G_CONSTANT})
634 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}});
635
636 addRulesForGOpcs(OpcList: {G_FREEZE})
637 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt}}})
638 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}})
639 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16}}})
640 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
641 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}});
642
643 addRulesForGOpcs(OpcList: {G_UNMERGE_VALUES})
644 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, UnmergeToShiftTrunc}})
645 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
646 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
647
648 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
649 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
650 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
651 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
652 addRulesForGOpcs(OpcList: {G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
653 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
654 G_AMDGPU_INTRIN_IMAGE_STORE,
655 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
656 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyINTRIN_IMAGE}});
657
658 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
659 auto Pred =
660 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
661 return CmpInst::isSigned(predicate: Pred);
662 });
663
664 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
665 auto Pred =
666 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
667 return ICmpInst::isEquality(P: Pred);
668 });
669
670 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
671 // clang-format off
672 addRulesForGOpcs(OpcList: {G_ICMP})
673 .Any(Init: {.Predicate: {{UniS1, _, S16}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
674 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
675 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
676 .Any(Init: {.Predicate: {{DivS1, _, S16}}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
677 .Any(Init: {.Predicate: {{UniS1, _, S32}}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
678 .Any(Init: {.Predicate: {{DivS1, _, S32}}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
679 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, STPred: HasScalarCompareEq64)
680 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, STPred: !HasScalarCompareEq64)
681 .Any(Init: {.Predicate: {{UniS1, _, S64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
682 .Any(Init: {.Predicate: {{DivS1, _, S64}}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}})
683 .Any(Init: {.Predicate: {{UniS1, _, Ptr32}}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
684 .Any(Init: {.Predicate: {{DivS1, _, Ptr32}}, .OperandMapping: {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
685 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, STPred: HasScalarCompareEq64)
686 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, STPred: !HasScalarCompareEq64)
687 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
688 .Any(Init: {.Predicate: {{DivS1, _, Ptr64}}, .OperandMapping: {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
689 // clang-format on
690
691 addRulesForGOpcs(OpcList: {G_BRCOND})
692 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
693 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
694
695 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
696
697 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
698 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
699 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
700 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
701 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
702 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
703 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
704
705 addRulesForGOpcs(OpcList: {G_ANYEXT})
706 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
707 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
708 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
709 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
710 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
711 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
712 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
713 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
714 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
715 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
716
717 bool Has16bitCmp = ST->has16BitInsts();
718
719 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
720 // It is up to user to deal with truncated bits.
721 addRulesForGOpcs(OpcList: {G_TRUNC})
722 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
723 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
724 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
725 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
726 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
727 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
728 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
729 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
730 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
731 // This is non-trivial. VgprToVccCopy is done using compare instruction.
732 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}, STPred: Has16bitCmp)
733 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr32AExt}, VgprToVccCopy}},
734 STPred: !Has16bitCmp)
735 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
736 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
737
738 addRulesForGOpcs(OpcList: {G_ZEXT})
739 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
740 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
741 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
742 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
743 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
744 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
745 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
746 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
747 // not extending S16 to S32 is questionable.
748 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
749 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
750 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
751 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
752
753 addRulesForGOpcs(OpcList: {G_SEXT})
754 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
755 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
756 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
757 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
758 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
759 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
760 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
761 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
762 // not extending S16 to S32 is questionable.
763 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
764 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
765 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
766 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
767
768 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
769 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
770 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
771 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
772 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
773
774 addRulesForGOpcs(OpcList: {G_ASSERT_ZEXT, G_ASSERT_SEXT}, FastTypes: Standard)
775 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Imm}})
776 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Imm}})
777 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Imm}})
778 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Imm}});
779
780 addRulesForGOpcs(OpcList: {G_ASSERT_ALIGN}, FastTypes: Standard)
781 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
782 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
783 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
784 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
785 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32}}})
786 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32}}})
787 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64}}})
788 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64}}});
789
790 // Atomic read-modify-write operations: result and value are always VGPR,
791 // pointer varies by address space.
792 addRulesForGOpcs(OpcList: {G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
793 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
794 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
795 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
796 G_ATOMICRMW_UDEC_WRAP})
797 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
798 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
799 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
800 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
801 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
802 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}});
803
804 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
805 bool HasAtomicBufferGlobalPkAddF16Insts =
806 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
807 ST->hasAtomicBufferGlobalPkAddF16Insts();
808 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
809 addRulesForGOpcs(OpcList: {G_ATOMICRMW_FADD})
810 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
811 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
812 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
813 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
814 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
815 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}})
816 .Any(Init: {.Predicate: {DivV2S16, P0, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP0, VgprV2S16}}},
817 STPred: HasAtomicFlatPkAdd16Insts)
818 .Any(Init: {.Predicate: {DivV2S16, P1, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP1, VgprV2S16}}},
819 STPred: HasAtomicBufferGlobalPkAddF16Insts)
820 .Any(Init: {.Predicate: {DivV2S16, P3, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP3, VgprV2S16}}},
821 STPred: HasAtomicDsPkAdd16Insts);
822
823 addRulesForGOpcs(OpcList: {G_ATOMIC_CMPXCHG})
824 .Any(Init: {.Predicate: {DivS32, P2}, .OperandMapping: {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
825 .Any(Init: {.Predicate: {DivS64, P2}, .OperandMapping: {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
826 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
827 .Any(Init: {.Predicate: {DivS64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
828
829 addRulesForGOpcs(OpcList: {G_AMDGPU_ATOMIC_CMPXCHG})
830 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, VgprV2S32}}})
831 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, VgprV2S32}}})
832 .Any(Init: {.Predicate: {DivS64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, VgprV2S64}}})
833 .Any(Init: {.Predicate: {DivS64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, VgprV2S64}}});
834
835 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, FastTypes: Standard)
836 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
837 {Vgpr32, Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
838 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64},
839 {Vgpr64, Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
840
841 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
842 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_SMAX,
843 G_AMDGPU_BUFFER_ATOMIC_SMIN},
844 FastTypes: Standard)
845 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
846 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
847
848 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
849 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
850 bool usesTrue16 = ST->useRealTrue16Insts();
851
852 Predicate isAlign16([](const MachineInstr &MI) -> bool {
853 return (*MI.memoperands_begin())->getAlign() >= Align(16);
854 });
855
856 Predicate isAlign4([](const MachineInstr &MI) -> bool {
857 return (*MI.memoperands_begin())->getAlign() >= Align(4);
858 });
859
860 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
861 return (*MI.memoperands_begin())->isAtomic();
862 });
863
864 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
865 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
866 });
867
868 Predicate isConst([](const MachineInstr &MI) -> bool {
869 // Address space in MMO be different then address space on pointer.
870 const MachineMemOperand *MMO = *MI.memoperands_begin();
871 const unsigned AS = MMO->getAddrSpace();
872 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
873 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
874 });
875
876 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
877 return (*MI.memoperands_begin())->isVolatile();
878 });
879
880 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
881 return (*MI.memoperands_begin())->isInvariant();
882 });
883
884 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
885 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
886 });
887
888 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
889 const MachineMemOperand *MMO = *MI.memoperands_begin();
890 return MMO->getAlign() >= Align(MMO->getSize().getValue());
891 });
892
893 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
894 const MachineMemOperand *MMO = *MI.memoperands_begin();
895 const unsigned MemSize = 8 * MMO->getSize().getValue();
896 return MemSize == 16 || MemSize == 8;
897 });
898
899 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
900 const MachineMemOperand *MMO = *MI.memoperands_begin();
901 return 8 * MMO->getSize().getValue() == 32;
902 });
903
904 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
905 (isConst || isInvMMO || isNoClobberMMO);
906
907 // clang-format off
908 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
909 addRulesForGOpcs(OpcList: {G_LOAD})
910 // flat, addrspace(0), never uniform - flat_load
911 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
912 .Any(Init: {.Predicate: {DivB32, P0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
913 .Any(Init: {.Predicate: {DivB64, P0}, .OperandMapping: {{VgprB64}, {VgprP0}}})
914 .Any(Init: {.Predicate: {DivB96, P0}, .OperandMapping: {{VgprB96}, {VgprP0}}})
915 .Any(Init: {.Predicate: {DivB128, P0}, .OperandMapping: {{VgprB128}, {VgprP0}}})
916
917 // global, addrspace(1)
918 // divergent - global_load
919 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
920 .Any(Init: {.Predicate: {DivB32, P1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
921 .Any(Init: {.Predicate: {DivB64, P1}, .OperandMapping: {{VgprB64}, {VgprP1}}})
922 .Any(Init: {.Predicate: {DivB96, P1}, .OperandMapping: {{VgprB96}, {VgprP1}}})
923 .Any(Init: {.Predicate: {DivB128, P1}, .OperandMapping: {{VgprB128}, {VgprP1}}})
924 .Any(Init: {.Predicate: {DivB256, P1}, .OperandMapping: {{VgprB256}, {VgprP1}, SplitLoad}})
925 .Any(Init: {.Predicate: {DivB512, P1}, .OperandMapping: {{VgprB512}, {VgprP1}, SplitLoad}})
926
927 // uniform - s_load
928 .Any(Init: {.Predicate: {{UniS16, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
929 .Any(Init: {.Predicate: {{UniS16, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
930 .Any(Init: {.Predicate: {{UniB32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
931 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
932 .Any(Init: {.Predicate: {{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
933 .Any(Init: {.Predicate: {{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}) //32-bit load
934 .Any(Init: {.Predicate: {{UniB64, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP1}}})
935 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, WidenLoad}}, STPred: !hasSMRDx3)
936 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, SplitLoad}}, STPred: !hasSMRDx3)
937 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}}}, STPred: hasSMRDx3)
938 .Any(Init: {.Predicate: {{UniB128, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP1}}})
939 .Any(Init: {.Predicate: {{UniB256, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
940 .Any(Init: {.Predicate: {{UniB512, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
941
942 // Uniform via global or buffer load, for example volatile or non-aligned
943 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
944 // selected as global_load, use SgprP1 for pointer instead to match
945 // patterns without flat-for-global, default for GFX7 and older.
946 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
947 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
948 .Any(Init: {.Predicate: {{UniS16, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
949 .Any(Init: {.Predicate: {{UniS16, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
950 .Any(Init: {.Predicate: {{UniB32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
951 .Any(Init: {.Predicate: {{UniB32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
952 .Any(Init: {.Predicate: {{UniB64, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP1}}})
953 .Any(Init: {.Predicate: {{UniB96, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP1}}})
954 .Any(Init: {.Predicate: {{UniB128, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP1}}})
955 .Any(Init: {.Predicate: {{UniB256, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP1}, SplitLoad}})
956 .Any(Init: {.Predicate: {{UniB512, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP1}, SplitLoad}})
957
958 // local, addrspace(3) - ds_load
959 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
960 .Any(Init: {.Predicate: {DivB32, P3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
961 .Any(Init: {.Predicate: {DivB64, P3}, .OperandMapping: {{VgprB64}, {VgprP3}}})
962 .Any(Init: {.Predicate: {DivB96, P3}, .OperandMapping: {{VgprB96}, {VgprP3}}})
963 .Any(Init: {.Predicate: {DivB128, P3}, .OperandMapping: {{VgprB128}, {VgprP3}}})
964
965 .Any(Init: {.Predicate: {UniS16, P3}, .OperandMapping: {{UniInVgprS16}, {SgprP3}}}, STPred: usesTrue16) // 16-bit load
966 .Any(Init: {.Predicate: {UniB32, P3}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
967 .Any(Init: {.Predicate: {UniB64, P3}, .OperandMapping: {{UniInVgprB64}, {VgprP3}}})
968 .Any(Init: {.Predicate: {UniB96, P3}, .OperandMapping: {{UniInVgprB96}, {VgprP3}}})
969 .Any(Init: {.Predicate: {UniB128, P3}, .OperandMapping: {{UniInVgprB128}, {VgprP3}}})
970
971 // constant, addrspace(4)
972 // divergent - global_load
973 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
974 .Any(Init: {.Predicate: {DivB32, P4}, .OperandMapping: {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
975 .Any(Init: {.Predicate: {DivB64, P4}, .OperandMapping: {{VgprB64}, {VgprP4}}})
976 .Any(Init: {.Predicate: {DivB96, P4}, .OperandMapping: {{VgprB96}, {VgprP4}}})
977 .Any(Init: {.Predicate: {DivB128, P4}, .OperandMapping: {{VgprB128}, {VgprP4}}})
978 .Any(Init: {.Predicate: {DivB256, P4}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
979 .Any(Init: {.Predicate: {DivB512, P4}, .OperandMapping: {{VgprB512}, {VgprP4}, SplitLoad}})
980
981 // uniform - s_load
982 .Any(Init: {.Predicate: {{UniS16, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
983 .Any(Init: {.Predicate: {{UniS16, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
984 .Any(Init: {.Predicate: {{UniB32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
985 .Any(Init: {.Predicate: {{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
986 .Any(Init: {.Predicate: {{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) //32-bit load
987 .Any(Init: {.Predicate: {{UniB64, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP4}}})
988 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasSMRDx3)
989 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasSMRDx3)
990 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasSMRDx3)
991 .Any(Init: {.Predicate: {{UniB128, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP4}}})
992 .Any(Init: {.Predicate: {{UniB256, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
993 .Any(Init: {.Predicate: {{UniB512, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
994
995 // uniform in vgpr - global_load or buffer_load
996 .Any(Init: {.Predicate: {{UniS16, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
997 .Any(Init: {.Predicate: {{UniS16, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
998 .Any(Init: {.Predicate: {{UniB32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
999 .Any(Init: {.Predicate: {{UniB32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1000 .Any(Init: {.Predicate: {{UniB64, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP4}}})
1001 .Any(Init: {.Predicate: {{UniB96, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP4}}})
1002 .Any(Init: {.Predicate: {{UniB128, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP4}}})
1003 .Any(Init: {.Predicate: {{UniB256, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1004 .Any(Init: {.Predicate: {{UniB512, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1005
1006 // private, addrspace(5), never uniform - scratch_load
1007 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16)
1008 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1009 .Any(Init: {.Predicate: {DivB64, P5}, .OperandMapping: {{VgprB64}, {VgprP5}}})
1010 .Any(Init: {.Predicate: {DivB96, P5}, .OperandMapping: {{VgprB96}, {VgprP5}}})
1011 .Any(Init: {.Predicate: {DivB128, P5}, .OperandMapping: {{VgprB128}, {VgprP5}}})
1012
1013 .Any(Init: {.Predicate: {DivS32, Ptr128}, .OperandMapping: {{Vgpr32}, {VgprPtr128}}});
1014
1015
1016 addRulesForGOpcs(OpcList: {G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1017 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0}}})
1018
1019 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1}}})
1020 .Any(Init: {.Predicate: {{UniS32, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1021 .Any(Init: {.Predicate: {{UniS32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}}}, STPred: hasSMRDSmall)
1022 .Any(Init: {.Predicate: {{UniS32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: !hasSMRDSmall)
1023 .Any(Init: {.Predicate: {{UniS32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: hasSMRDSmall)
1024
1025 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3}}})
1026 .Any(Init: {.Predicate: {UniS32, P3}, .OperandMapping: {{UniInVgprS32}, {VgprP3}}})
1027
1028 .Any(Init: {.Predicate: {DivS32, P4}, .OperandMapping: {{Vgpr32}, {VgprP4}}})
1029 .Any(Init: {.Predicate: {{UniS32, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1030 .Any(Init: {.Predicate: {{UniS32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}}}, STPred: hasSMRDSmall)
1031 .Any(Init: {.Predicate: {{UniS32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: !hasSMRDSmall)
1032 .Any(Init: {.Predicate: {{UniS32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: hasSMRDSmall)
1033
1034 .Any(Init: {.Predicate: {DivS32, P5}, .OperandMapping: {{Vgpr32}, {VgprP5}}});
1035
1036 addRulesForGOpcs(OpcList: {G_STORE})
1037 // addrspace(0)
1038 .Any(Init: {.Predicate: {S16, P0}, .OperandMapping: {{}, {Vgpr16, VgprP0}}}, STPred: usesTrue16) // 16-bit store
1039 .Any(Init: {.Predicate: {B32, P0}, .OperandMapping: {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1040 .Any(Init: {.Predicate: {B64, P0}, .OperandMapping: {{}, {VgprB64, VgprP0}}})
1041 .Any(Init: {.Predicate: {B96, P0}, .OperandMapping: {{}, {VgprB96, VgprP0}}})
1042 .Any(Init: {.Predicate: {B128, P0}, .OperandMapping: {{}, {VgprB128, VgprP0}}})
1043
1044 // addrspace(1), there are no stores to addrspace(4)
1045 // For targets:
1046 // - with "+flat-for-global" - global_store
1047 // - without(-flat-for-global) - buffer_store addr64
1048 .Any(Init: {.Predicate: {S16, DivP1}, .OperandMapping: {{}, {Vgpr16, VgprP1}}}, STPred: usesTrue16) // 16-bit store
1049 .Any(Init: {.Predicate: {B32, DivP1}, .OperandMapping: {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1050 .Any(Init: {.Predicate: {B64, DivP1}, .OperandMapping: {{}, {VgprB64, VgprP1}}})
1051 .Any(Init: {.Predicate: {B96, DivP1}, .OperandMapping: {{}, {VgprB96, VgprP1}}})
1052 .Any(Init: {.Predicate: {B128, DivP1}, .OperandMapping: {{}, {VgprB128, VgprP1}}})
1053
1054 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1055 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1056 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1057 .Any(Init: {.Predicate: {S16, UniP1}, .OperandMapping: {{}, {Vgpr16, SgprP1}}}, STPred: usesTrue16) // 16-bit store
1058 .Any(Init: {.Predicate: {B32, UniP1}, .OperandMapping: {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1059 .Any(Init: {.Predicate: {B64, UniP1}, .OperandMapping: {{}, {VgprB64, SgprP1}}})
1060 .Any(Init: {.Predicate: {B96, UniP1}, .OperandMapping: {{}, {VgprB96, SgprP1}}})
1061 .Any(Init: {.Predicate: {B128, UniP1}, .OperandMapping: {{}, {VgprB128, SgprP1}}})
1062
1063 // addrspace(3) and addrspace(5)
1064 .Any(Init: {.Predicate: {S16, Ptr32}, .OperandMapping: {{}, {Vgpr16, VgprPtr32}}}, STPred: usesTrue16) // 16-bit store
1065 .Any(Init: {.Predicate: {B32, Ptr32}, .OperandMapping: {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1066 .Any(Init: {.Predicate: {B64, Ptr32}, .OperandMapping: {{}, {VgprB64, VgprPtr32}}})
1067 .Any(Init: {.Predicate: {B96, Ptr32}, .OperandMapping: {{}, {VgprB96, VgprPtr32}}})
1068 .Any(Init: {.Predicate: {B128, Ptr32}, .OperandMapping: {{}, {VgprB128, VgprPtr32}}});
1069
1070 // clang-format on
1071
1072 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1073 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1074 FastTypes: StandardB)
1075 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1076 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1077 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1078 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1079 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1080 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1081 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1082 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1083
1084 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1085 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1086 FastTypes: StandardB)
1087 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1088 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1089
1090 addRulesForGOpcs(
1091 OpcList: {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1092 FastTypes: StandardB)
1093 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1094 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1095
1096 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1097 FastTypes: StandardB)
1098 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1099 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1100 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1101 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1102 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1103 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1104 .Any(Init: {.Predicate: {DivB160}, .OperandMapping: {{VgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1105 .Any(Init: {.Predicate: {UniB160},
1106 .OperandMapping: {{UniInVgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1107
1108 addRulesForGOpcs(
1109 OpcList: {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1110 FastTypes: StandardB)
1111 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1112 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1113 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1114 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1115 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1116 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1117
1118 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1119 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1120 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1121 G_AMDGPU_TBUFFER_STORE_FORMAT,
1122 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1123 .Any(Init: {.Predicate: {B32}, .OperandMapping: {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1124 .Any(Init: {.Predicate: {B64}, .OperandMapping: {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1125 .Any(Init: {.Predicate: {B96}, .OperandMapping: {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1126 .Any(Init: {.Predicate: {B128}, .OperandMapping: {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1127
1128 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1129 // address components are VGPR.
1130 //
1131 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1132 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1133 // idxen_imm
1134 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_FADD})
1135 .Any(Init: {.Predicate: {S32, S32, V4S32, S32, S32, S32},
1136 .OperandMapping: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1137 .Any(Init: {.Predicate: {S64, S64, V4S32, S32, S32, S32},
1138 .OperandMapping: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1139 .Any(Init: {.Predicate: {V2S16, V2S16, V4S32, S32, S32, S32},
1140 .OperandMapping: {{VgprV2S16},
1141 {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1142
1143 addRulesForGOpcs(OpcList: {G_PTR_ADD})
1144 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1145 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1146 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1147 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1148
1149 addRulesForGOpcs(OpcList: {G_INTTOPTR})
1150 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
1151 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
1152 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
1153 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
1154 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
1155 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
1156
1157 addRulesForGOpcs(OpcList: {G_PTRTOINT})
1158 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
1159 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
1160 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
1161 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
1162 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
1163 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
1164
1165 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1166 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1167 addRulesForGOpcs(OpcList: {G_PTRMASK})
1168 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
1169 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
1170 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {SgprP3, Sgpr32}}})
1171 .Any(Init: {.Predicate: {DivP3}, .OperandMapping: {{VgprP3}, {VgprP3, Vgpr32}}});
1172
1173 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}});
1174
1175 addRulesForGOpcs(OpcList: {G_BITREVERSE}, FastTypes: Standard)
1176 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1177 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1178 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
1179 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1180
1181 addRulesForGOpcs(OpcList: {G_FENCE}).Any(Init: {.Predicate: {{}}, .OperandMapping: {{}, {}}});
1182
1183 addRulesForGOpcs(OpcList: {G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, FastTypes: Standard)
1184 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
1185
1186 addRulesForGOpcs(OpcList: {G_BLOCK_ADDR}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1187
1188 addRulesForGOpcs(OpcList: {G_GLOBAL_VALUE})
1189 .Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}})
1190 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {}}})
1191 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {}}})
1192 .Any(Init: {.Predicate: {UniP4}, .OperandMapping: {{SgprP4}, {}}})
1193 .Any(Init: {.Predicate: {UniP8}, .OperandMapping: {{SgprP8}, {}}});
1194
1195 addRulesForGOpcs(OpcList: {G_AMDGPU_WAVE_ADDRESS}).Any(Init: {.Predicate: {UniP5}, .OperandMapping: {{SgprP5}, {}}});
1196
1197 addRulesForGOpcs(OpcList: {G_SI_CALL})
1198 .Any(Init: {.Predicate: {_, UniP0}, .OperandMapping: {{None}, {SgprP0}}})
1199 .Any(Init: {.Predicate: {_, DivP0}, .OperandMapping: {{None}, {SgprP0Call_WF}}})
1200 .Any(Init: {.Predicate: {_, UniP4}, .OperandMapping: {{None}, {SgprP4}}})
1201 .Any(Init: {.Predicate: {_, DivP4}, .OperandMapping: {{None}, {SgprP4Call_WF}}});
1202
1203 bool hasSALUFloat = ST->hasSALUFloatInsts();
1204
1205 addRulesForGOpcs(OpcList: {G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, FastTypes: Standard)
1206 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1207 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1208 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1209 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1210 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1211 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1212 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1213 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1214 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, STPred: !hasSALUFloat)
1215 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
1216 STPred: hasSALUFloat)
1217 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1218
1219 addRulesForGOpcs(OpcList: {G_FSUB, G_STRICT_FSUB}, FastTypes: Standard)
1220 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1221 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1222 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1223 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1224 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1225 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1226
1227 addRulesForGOpcs(OpcList: {G_FMAD}, FastTypes: Standard)
1228 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1229 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1230 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1231 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1232
1233 addRulesForGOpcs(OpcList: {G_FLDEXP, G_STRICT_FLDEXP}, FastTypes: Standard)
1234 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1235 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1236 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1237 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1238 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1239 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
1240
1241 addRulesForGOpcs(OpcList: {G_FMA, G_STRICT_FMA}, FastTypes: Standard)
1242 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1243 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1244 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1245 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1246 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
1247 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1248 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1249 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1250 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1251 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1252 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1253 .Uni(Ty: V2S16,
1254 RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
1255 STPred: hasSALUFloat)
1256 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
1257 STPred: !hasSALUFloat);
1258
1259 addRulesForGOpcs(OpcList: {G_AMDGPU_FMED3}, FastTypes: Standard)
1260 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1261 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1262 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1263 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1264
1265 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1266 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1267 // instructions on SALU.
1268 addRulesForGOpcs(OpcList: {G_AMDGPU_SMED3}, FastTypes: Standard)
1269 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1270 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1271
1272 // FNEG and FABS are either folded as source modifiers or can be selected as
1273 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1274 // targets without SALU float we still select them as VGPR since there would
1275 // be no real sgpr use.
1276 addRulesForGOpcs(OpcList: {G_FNEG, G_FABS}, FastTypes: Standard)
1277 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasSALUFloat)
1278 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1279 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1280 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}}, STPred: !hasSALUFloat)
1281 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1282 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1283 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1284 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1285 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}}, STPred: !hasSALUFloat)
1286 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, STPred: hasSALUFloat)
1287 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1288 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1289 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1290
1291 addRulesForGOpcs(OpcList: {G_FCANONICALIZE}, FastTypes: Standard)
1292 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1293 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1294 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1295 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1296 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1297 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1298 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
1299 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1300 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1301 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1302
1303 bool hasPST = ST->hasPseudoScalarTrans();
1304 addRulesForGOpcs(OpcList: {G_FSQRT}, FastTypes: Standard)
1305 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1306 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasPST)
1307 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasPST);
1308
1309 addRulesForGOpcs(OpcList: {G_FPTOUI, G_FPTOSI})
1310 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1311 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1312 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1313 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat)
1314 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1315 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1316 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1317 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1318 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1319 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}});
1320
1321 addRulesForGOpcs(OpcList: {G_UITOFP, G_SITOFP})
1322 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1323 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1324 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1325 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1326 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1327 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1328 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1329 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1330 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1331 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}});
1332
1333 addRulesForGOpcs(OpcList: {G_FPEXT})
1334 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1335 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1336 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}})
1337 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1338 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat);
1339
1340 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_PK_I16_I32}, FastTypes: Standard)
1341 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1342 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {Vgpr32, Vgpr32}});
1343
1344 addRulesForGOpcs(OpcList: {G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, FastTypes: Standard)
1345 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1346 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
1347
1348 addRulesForGOpcs(OpcList: {G_FMINIMUM, G_FMAXIMUM}, FastTypes: Standard)
1349 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}})
1350 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1351 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
1352 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1353 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1354 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1355 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1356 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1357
1358 addRulesForGOpcs(OpcList: {G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1359 FastTypes: Standard)
1360 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1361 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1362 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1363 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1364 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1365 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1366 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1367 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1368 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1369 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1370
1371 addRulesForGOpcs(OpcList: {G_FPTRUNC})
1372 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1373 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1374 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1375 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {VgprV2S32}}})
1376 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
1377 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1378 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat);
1379
1380 addRulesForGOpcs(OpcList: {G_IS_FPCLASS})
1381 .Any(Init: {.Predicate: {DivS1, S16}, .OperandMapping: {{Vcc}, {Vgpr16}}})
1382 .Any(Init: {.Predicate: {UniS1, S16}, .OperandMapping: {{UniInVcc}, {Vgpr16}}})
1383 .Any(Init: {.Predicate: {DivS1, S32}, .OperandMapping: {{Vcc}, {Vgpr32}}})
1384 .Any(Init: {.Predicate: {UniS1, S32}, .OperandMapping: {{UniInVcc}, {Vgpr32}}})
1385 .Any(Init: {.Predicate: {DivS1, S64}, .OperandMapping: {{Vcc}, {Vgpr64}}})
1386 .Any(Init: {.Predicate: {UniS1, S64}, .OperandMapping: {{UniInVcc}, {Vgpr64}}});
1387
1388 addRulesForGOpcs(OpcList: {G_FCMP}, FastTypes: Standard)
1389 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1390 STPred: hasSALUFloat)
1391 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1392 STPred: !hasSALUFloat)
1393 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
1394 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1395 STPred: hasSALUFloat)
1396 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1397 STPred: !hasSALUFloat)
1398 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
1399 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1400 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}});
1401
1402 addRulesForGOpcs(OpcList: {G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1403 G_FEXP2, G_FLOG2},
1404 FastTypes: Standard)
1405 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1406 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1407 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1408 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1409 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1410 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1411
1412 using namespace Intrinsic;
1413
1414 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}});
1415
1416 addRulesForIOpcs(OpcList: {amdgcn_groupstaticsize}).Any(Init: {.Predicate: {S32}, .OperandMapping: {{Sgpr32}, {IntrId}}});
1417
1418 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1419 addRulesForIOpcs(OpcList: {amdgcn_end_cf})
1420 .Any(Init: {.Predicate: {_, UniS32}, .OperandMapping: {{}, {IntrId, Sgpr32}}})
1421 .Any(Init: {.Predicate: {_, UniS64}, .OperandMapping: {{}, {IntrId, Sgpr64}}});
1422
1423 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
1424 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1425 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1426
1427 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
1428 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1429
1430 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
1431 .Any(Init: {.Predicate: {UniS32, _, DivS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}})
1432 // this should not exist in the first place, it is from call lowering
1433 // readfirstlaning just in case register is not in sgpr.
1434 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
1435
1436 addRulesForIOpcs(OpcList: {amdgcn_s_sleep}).Any(Init: {.Predicate: {_, _}, .OperandMapping: {{}, {IntrId, Imm}}});
1437
1438 addRulesForIOpcs(OpcList: {amdgcn_bitop3}, FastTypes: Standard)
1439 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1440 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1441 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1442 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1443
1444 addRulesForIOpcs(OpcList: {amdgcn_mul_u24, amdgcn_mul_i24}, FastTypes: Standard)
1445 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1446 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1447 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1448 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1449
1450 addRulesForIOpcs(OpcList: {amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1451 FastTypes: Standard)
1452 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1453 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1454
1455 addRulesForIOpcs(OpcList: {amdgcn_fma_legacy}, FastTypes: Standard)
1456 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1457 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1458
1459 addRulesForIOpcs(OpcList: {amdgcn_frexp_mant, amdgcn_fract}, FastTypes: Standard)
1460 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
1461 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1462 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
1463 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1464 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
1465 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
1466
1467 addRulesForIOpcs(OpcList: {amdgcn_prng_b32})
1468 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
1469 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}});
1470
1471 addRulesForIOpcs(OpcList: {amdgcn_sffbh}, FastTypes: Standard)
1472 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
1473 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
1474
1475 addRulesForIOpcs(OpcList: {amdgcn_ubfe, amdgcn_sbfe}, FastTypes: Standard)
1476 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1477 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1478 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1479 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1480
1481 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b64})
1482 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1483 .Any(Init: {.Predicate: {DivB32}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1}}});
1484
1485 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b128})
1486 .Any(Init: {.Predicate: {DivB64}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
1487 .Any(Init: {.Predicate: {DivB128}, .OperandMapping: {{VgprB128}, {IntrId, SgprP1}}});
1488
1489 addRulesForIOpcs(OpcList: {amdgcn_global_atomic_ordered_add_b64})
1490 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1491
1492 addRulesForIOpcs(OpcList: {amdgcn_raw_buffer_load_lds})
1493 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1494
1495 addRulesForIOpcs(OpcList: {amdgcn_struct_buffer_load_lds})
1496 .Any(Init: {.Predicate: {_},
1497 .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1498
1499 addRulesForIOpcs(OpcList: {amdgcn_raw_ptr_buffer_load_lds})
1500 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1501
1502 addRulesForIOpcs(OpcList: {amdgcn_struct_ptr_buffer_load_lds})
1503 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1504
1505 addRulesForIOpcs(OpcList: {amdgcn_wwm, amdgcn_strict_wwm}, FastTypes: StandardB)
1506 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
1507 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32}})
1508 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}})
1509 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64}})
1510 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, VgprB96}})
1511 .Uni(Ty: B96, RuleApplyIDs: {{SgprB96}, {IntrId, SgprB96}})
1512 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB128}})
1513 .Uni(Ty: B128, RuleApplyIDs: {{SgprB128}, {IntrId, SgprB128}})
1514 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {IntrId, SgprB256}}})
1515 .Any(Init: {.Predicate: {DivB256}, .OperandMapping: {{VgprB256}, {IntrId, VgprB256}}})
1516 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {IntrId, SgprB512}}})
1517 .Any(Init: {.Predicate: {DivB512}, .OperandMapping: {{VgprB512}, {IntrId, VgprB512}}});
1518
1519} // end initialize rules
1520