1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPURegBankLegalizeRules.h"
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
19#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20#include "llvm/CodeGen/MachineUniformityAnalysis.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
22#include "llvm/Support/AMDGPUAddrSpace.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
33RegBankLLTMapping::RegBankLLTMapping(
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
36 LoweringMethodID LoweringMethod)
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
38 LoweringMethod(LoweringMethod) {}
39
40PredicateMapping::PredicateMapping(
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
43 : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {}
44
45bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128);
73 case Ptr32:
74 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32);
75 case Ptr64:
76 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64);
77 case Ptr128:
78 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case DivAnyTy:
102 return MUI.isDivergentAtDef(V: Reg);
103 case UniS1:
104 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniformAtDef(V: Reg);
105 case UniS16:
106 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniformAtDef(V: Reg);
107 case UniS32:
108 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniformAtDef(V: Reg);
109 case UniS64:
110 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniformAtDef(V: Reg);
111 case UniS128:
112 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniformAtDef(V: Reg);
113 case UniP0:
114 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniformAtDef(V: Reg);
115 case UniP1:
116 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniformAtDef(V: Reg);
117 case UniP2:
118 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isUniformAtDef(V: Reg);
119 case UniP3:
120 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniformAtDef(V: Reg);
121 case UniP4:
122 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniformAtDef(V: Reg);
123 case UniP5:
124 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniformAtDef(V: Reg);
125 case UniP6:
126 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 6, SizeInBits: 32) && MUI.isUniformAtDef(V: Reg);
127 case UniP8:
128 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 8, SizeInBits: 128) && MUI.isUniformAtDef(V: Reg);
129 case UniPtr32:
130 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniformAtDef(V: Reg);
131 case UniPtr64:
132 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniformAtDef(V: Reg);
133 case UniPtr128:
134 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniformAtDef(V: Reg);
135 case UniV2S16:
136 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) &&
137 MUI.isUniformAtDef(V: Reg);
138 case UniV2S32:
139 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) &&
140 MUI.isUniformAtDef(V: Reg);
141 case UniV3S32:
142 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32) &&
143 MUI.isUniformAtDef(V: Reg);
144 case UniV4S32:
145 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) &&
146 MUI.isUniformAtDef(V: Reg);
147 case UniV6S32:
148 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 32) &&
149 MUI.isUniformAtDef(V: Reg);
150 case UniV8S16:
151 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) &&
152 MUI.isUniformAtDef(V: Reg);
153 case UniV8S32:
154 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32) &&
155 MUI.isUniformAtDef(V: Reg);
156 case UniV16S16:
157 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16) &&
158 MUI.isUniformAtDef(V: Reg);
159 case UniV16S32:
160 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32) &&
161 MUI.isUniformAtDef(V: Reg);
162 case UniV32S16:
163 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16) &&
164 MUI.isUniformAtDef(V: Reg);
165 case UniV32S32:
166 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 32) &&
167 MUI.isUniformAtDef(V: Reg);
168 case UniV2S64:
169 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
170 MUI.isUniformAtDef(V: Reg);
171 case UniB32:
172 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniformAtDef(V: Reg);
173 case UniB64:
174 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniformAtDef(V: Reg);
175 case UniB96:
176 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniformAtDef(V: Reg);
177 case UniB128:
178 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniformAtDef(V: Reg);
179 case UniB160:
180 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniformAtDef(V: Reg);
181 case UniB256:
182 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniformAtDef(V: Reg);
183 case UniB512:
184 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniformAtDef(V: Reg);
185 case UniBRC: {
186 if (MUI.isDivergentAtDef(V: Reg))
187 return false;
188 // Check if there is SGPR register class of same size as the LLT.
189 const SIRegisterInfo *TRI =
190 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
191 // There is no 16 bit SGPR register class. Extra size check is required
192 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
193 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
194 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize);
195 }
196 case DivS1:
197 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergentAtDef(V: Reg);
198 case DivS16:
199 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergentAtDef(V: Reg);
200 case DivS32:
201 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergentAtDef(V: Reg);
202 case DivS64:
203 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergentAtDef(V: Reg);
204 case DivS128:
205 return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergentAtDef(V: Reg);
206 case DivP0:
207 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergentAtDef(V: Reg);
208 case DivP1:
209 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergentAtDef(V: Reg);
210 case DivP2:
211 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 2, SizeInBits: 32) && MUI.isDivergentAtDef(V: Reg);
212 case DivP3:
213 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergentAtDef(V: Reg);
214 case DivP4:
215 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergentAtDef(V: Reg);
216 case DivP5:
217 return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergentAtDef(V: Reg);
218 case DivPtr32:
219 return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergentAtDef(V: Reg);
220 case DivPtr64:
221 return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergentAtDef(V: Reg);
222 case DivPtr128:
223 return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergentAtDef(V: Reg);
224 case DivV2S16:
225 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) &&
226 MUI.isDivergentAtDef(V: Reg);
227 case DivV2S32:
228 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) &&
229 MUI.isDivergentAtDef(V: Reg);
230 case DivV4S32:
231 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) &&
232 MUI.isDivergentAtDef(V: Reg);
233 case DivV2S64:
234 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
235 MUI.isDivergentAtDef(V: Reg);
236 case DivV3S32:
237 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32) &&
238 MUI.isDivergentAtDef(V: Reg);
239 case DivV4S16:
240 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) &&
241 MUI.isDivergentAtDef(V: Reg);
242 case DivV8S16:
243 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) &&
244 MUI.isDivergentAtDef(V: Reg);
245 case DivV8S32:
246 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32) &&
247 MUI.isDivergentAtDef(V: Reg);
248 case DivV16S16:
249 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16) &&
250 MUI.isDivergentAtDef(V: Reg);
251 case DivV16S32:
252 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32) &&
253 MUI.isDivergentAtDef(V: Reg);
254 case DivV6S32:
255 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 32) &&
256 MUI.isDivergentAtDef(V: Reg);
257 case DivV32S16:
258 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16) &&
259 MUI.isDivergentAtDef(V: Reg);
260 case DivV32S32:
261 return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 32) &&
262 MUI.isDivergentAtDef(V: Reg);
263 case DivB32:
264 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergentAtDef(V: Reg);
265 case DivB64:
266 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergentAtDef(V: Reg);
267 case DivB96:
268 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergentAtDef(V: Reg);
269 case DivB128:
270 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergentAtDef(V: Reg);
271 case DivB160:
272 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergentAtDef(V: Reg);
273 case DivB256:
274 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergentAtDef(V: Reg);
275 case DivB512:
276 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergentAtDef(V: Reg);
277 case DivBRC: {
278 if (MUI.isUniformAtDef(V: Reg))
279 return false;
280 // Check if there is VGPR register class of same size as the LLT.
281 const SIRegisterInfo *TRI =
282 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
283 return TRI->getSGPRClassForBitWidth(BitWidth: MRI.getType(Reg).getSizeInBits());
284 }
285 case BRC: {
286 // Check if there is SGPR and VGPR register class of same size as the LLT.
287 const SIRegisterInfo *TRI =
288 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
289 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
290 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(BitWidth: LLTSize) &&
291 TRI->getVGPRClassForBitWidth(BitWidth: LLTSize);
292 }
293 case _:
294 return true;
295 default:
296 llvm_unreachable("missing matchUniformityAndLLT");
297 }
298}
299
300bool PredicateMapping::match(const MachineInstr &MI,
301 const MachineUniformityInfo &MUI,
302 const MachineRegisterInfo &MRI) const {
303 // Check LLT signature.
304 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
305 const MachineOperand &MO = MI.getOperand(i);
306 if (OpUniformityAndTypes[i] == _) {
307 assert((!MI.getOperand(i).isReg() ||
308 !MI.getOperand(i).getReg().isVirtual()) &&
309 "_ is for non-register and physical register operands only");
310 continue;
311 }
312
313 // Remaining IDs check registers.
314 if (!MO.isReg())
315 return false;
316
317 if (!matchUniformityAndLLT(Reg: MO.getReg(), UniID: OpUniformityAndTypes[i], MUI, MRI))
318 return false;
319 }
320
321 // More complex check.
322 if (TestFunc)
323 return TestFunc(MI);
324
325 return true;
326}
327
328SetOfRulesForOpcode::SetOfRulesForOpcode() = default;
329
330SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)
331 : FastTypes(FastTypes) {}
332
333UniformityLLTOpPredicateID LLTToId(LLT Ty) {
334 if (Ty == LLT::scalar(SizeInBits: 16))
335 return S16;
336 if (Ty == LLT::scalar(SizeInBits: 32))
337 return S32;
338 if (Ty == LLT::scalar(SizeInBits: 64))
339 return S64;
340 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16))
341 return V2S16;
342 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32))
343 return V2S32;
344 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
345 return V3S32;
346 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32))
347 return V4S32;
348 return _;
349}
350
351UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
352 if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) ||
353 isAnyPtr(Ty, Width: 32))
354 return B32;
355 if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) ||
356 Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64))
357 return B64;
358 if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32))
359 return B96;
360 if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) ||
361 Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 128))
362 return B128;
363 return _;
364}
365
366const RegBankLLTMapping *
367SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
368 const MachineRegisterInfo &MRI,
369 const MachineUniformityInfo &MUI) const {
370 // Search in "Fast Rules".
371 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
372 // slot that could "match fast Predicate". If not, InvalidMapping is
373 // returned which results in failure, does not search "Slow Rules".
374 if (FastTypes != NoFastRules) {
375 Register Reg = MI.getOperand(i: 0).getReg();
376 int Slot;
377 if (FastTypes == StandardB)
378 Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg)));
379 else
380 Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg)));
381
382 if (Slot != -1)
383 return MUI.isUniformAtDef(V: Reg) ? &Uni[Slot] : &Div[Slot];
384 }
385
386 // Slow search for more complex rules.
387 for (const RegBankLegalizeRule &Rule : Rules) {
388 if (Rule.Predicate.match(MI, MUI, MRI))
389 return &Rule.OperandMapping;
390 }
391
392 return nullptr;
393}
394
395void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
396 Rules.push_back(Elt: Rule);
397}
398
399void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
400 RegBankLLTMapping RuleApplyIDs) {
401 int Slot = getFastPredicateSlot(Ty);
402 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
403 Div[Slot] = std::move(RuleApplyIDs);
404}
405
406void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty,
407 RegBankLLTMapping RuleApplyIDs) {
408 int Slot = getFastPredicateSlot(Ty);
409 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
410 Uni[Slot] = std::move(RuleApplyIDs);
411}
412
413int SetOfRulesForOpcode::getFastPredicateSlot(
414 UniformityLLTOpPredicateID Ty) const {
415 switch (FastTypes) {
416 case Standard: {
417 switch (Ty) {
418 case S32:
419 return 0;
420 case S16:
421 return 1;
422 case S64:
423 return 2;
424 case V2S16:
425 return 3;
426 default:
427 return -1;
428 }
429 }
430 case StandardB: {
431 switch (Ty) {
432 case B32:
433 return 0;
434 case B64:
435 return 1;
436 case B96:
437 return 2;
438 case B128:
439 return 3;
440 default:
441 return -1;
442 }
443 }
444 case Vector: {
445 switch (Ty) {
446 case S32:
447 return 0;
448 case V2S32:
449 return 1;
450 case V3S32:
451 return 2;
452 case V4S32:
453 return 3;
454 default:
455 return -1;
456 }
457 }
458 default:
459 return -1;
460 }
461}
462
463RegBankLegalizeRules::RuleSetInitializer
464RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
465 FastRulesTypes FastTypes) {
466 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
467}
468
469RegBankLegalizeRules::RuleSetInitializer
470RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
471 FastRulesTypes FastTypes) {
472 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
473}
474
475const SetOfRulesForOpcode *
476RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
477 unsigned Opc = MI.getOpcode();
478 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
479 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
480 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
481 unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID();
482 auto IRAIt = IRulesAlias.find(Val: IntrID);
483 if (IRAIt == IRulesAlias.end())
484 return nullptr;
485 return &IRules.at(Val: IRAIt->second);
486 }
487
488 auto GRAIt = GRulesAlias.find(Val: Opc);
489 if (GRAIt == GRulesAlias.end())
490 return nullptr;
491 return &GRules.at(Val: GRAIt->second);
492}
493
494// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
495class Predicate {
496private:
497 struct Elt {
498 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
499 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
500 // Sequences of && and || will be represented by jumps, for example:
501 // (A && B && ... X) or (A && B && ... X) || Y
502 // A == true jump to B
503 // A == false jump to end or Y, result is A(false) or Y
504 // (A || B || ... X) or (A || B || ... X) && Y
505 // A == true jump to end or Y, result is A(true) or Y
506 // A == false jump to B
507 // Notice that when negating expression, we simply flip Neg on each Pred
508 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
509 std::function<bool(const MachineInstr &)> Pred;
510 bool Neg; // Neg of Pred is calculated before jump
511 unsigned TJumpOffset;
512 unsigned FJumpOffset;
513 };
514
515 SmallVector<Elt, 8> Expression;
516
517 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); };
518
519public:
520 Predicate(std::function<bool(const MachineInstr &)> Pred) {
521 Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1});
522 };
523
524 bool operator()(const MachineInstr &MI) const {
525 unsigned Idx = 0;
526 unsigned ResultIdx = Expression.size();
527 bool Result;
528 do {
529 Result = Expression[Idx].Pred(MI);
530 Result = Expression[Idx].Neg ? !Result : Result;
531 if (Result) {
532 Idx += Expression[Idx].TJumpOffset;
533 } else {
534 Idx += Expression[Idx].FJumpOffset;
535 }
536 } while ((Idx != ResultIdx));
537
538 return Result;
539 };
540
541 Predicate operator!() const {
542 SmallVector<Elt, 8> NegExpression;
543 for (const Elt &ExprElt : Expression) {
544 NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset,
545 .FJumpOffset: ExprElt.TJumpOffset});
546 }
547 return Predicate(std::move(NegExpression));
548 };
549
550 Predicate operator&&(const Predicate &RHS) const {
551 SmallVector<Elt, 8> AndExpression = Expression;
552
553 unsigned RHSSize = RHS.Expression.size();
554 unsigned ResultIdx = Expression.size();
555 for (unsigned i = 0; i < ResultIdx; ++i) {
556 // LHS results in false, whole expression results in false.
557 if (i + AndExpression[i].FJumpOffset == ResultIdx)
558 AndExpression[i].FJumpOffset += RHSSize;
559 }
560
561 AndExpression.append(RHS: RHS.Expression);
562
563 return Predicate(std::move(AndExpression));
564 }
565
566 Predicate operator||(const Predicate &RHS) const {
567 SmallVector<Elt, 8> OrExpression = Expression;
568
569 unsigned RHSSize = RHS.Expression.size();
570 unsigned ResultIdx = Expression.size();
571 for (unsigned i = 0; i < ResultIdx; ++i) {
572 // LHS results in true, whole expression results in true.
573 if (i + OrExpression[i].TJumpOffset == ResultIdx)
574 OrExpression[i].TJumpOffset += RHSSize;
575 }
576
577 OrExpression.append(RHS: RHS.Expression);
578
579 return Predicate(std::move(OrExpression));
580 }
581};
582
583// Initialize rules
584RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
585 MachineRegisterInfo &_MRI)
586 : ST(&_ST), MRI(&_MRI) {
587
588 addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard)
589 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
590 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
591 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
592 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
593 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
594 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
595 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}})
596 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
597 .Any(Init: {.Predicate: {UniV2S64}, .OperandMapping: {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64}}})
598 .Any(Init: {.Predicate: {DivV2S64}, .OperandMapping: {{VgprV2S64}, {VgprV2S64, VgprV2S64}}});
599
600 addRulesForGOpcs(OpcList: {G_UADDO, G_USUBO}, FastTypes: Standard)
601 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
602 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
603
604 addRulesForGOpcs(OpcList: {G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, FastTypes: Standard)
605 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
606 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
607
608 addRulesForGOpcs(OpcList: {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, FastTypes: Standard)
609 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
610 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
611 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
612 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
613 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
614 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
615
616 bool HasVecMulU64 = ST->hasVMulU64Inst();
617 addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard)
618 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
619 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
620 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
621 .Uni(Ty: S64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
622 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
623 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
624 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
625 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}}, STPred: HasVecMulU64)
626 .Div(Ty: S64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, STPred: !HasVecMulU64);
627
628 bool hasMulHi = ST->hasScalarMulHiInsts();
629 addRulesForGOpcs(OpcList: {G_UMULH, G_SMULH}, FastTypes: Standard)
630 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
631 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasMulHi)
632 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasMulHi);
633
634 addRulesForGOpcs(OpcList: {G_AMDGPU_MAD_U64_U32}, FastTypes: Standard)
635 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
636 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr64}, UniMAD64});
637
638 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
639 addRulesForGOpcs(OpcList: {G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, FastTypes: Standard)
640 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, STPred: HasScalarSMulU64)
641 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
642
643 addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB)
644 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
645 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}})
646 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}})
647 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}})
648 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}})
649 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}})
650 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}})
651 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
652
653 addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard)
654 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
655 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
656 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
657 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
658 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
659 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
660 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
661 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
662
663 addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard)
664 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
665 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
666 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
667 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
668 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
669 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
670 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
671 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
672
673 addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard)
674 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
675 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
676 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift})
677 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
678 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
679 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}})
680 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
681 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
682
683 addRulesForGOpcs(OpcList: {G_FSHR}, FastTypes: Standard)
684 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
685 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
686
687 addRulesForGOpcs(OpcList: {G_BSWAP}, FastTypes: Standard)
688 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
689 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
690 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
691 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
692 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
693 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}});
694
695 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_F32_UBYTE0, G_AMDGPU_CVT_F32_UBYTE1,
696 G_AMDGPU_CVT_F32_UBYTE2, G_AMDGPU_CVT_F32_UBYTE3,
697 G_AMDGPU_RCP_IFLAG},
698 FastTypes: Standard)
699 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
700 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}});
701
702 addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}});
703
704 addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard)
705 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
706 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
707 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
708 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
709
710 addRulesForGOpcs(OpcList: {G_SMIN, G_SMAX}, FastTypes: Standard)
711 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
712 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
713 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
714 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
715 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
716 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
717 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
718 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
719
720 addRulesForGOpcs(OpcList: {G_UMIN, G_UMAX}, FastTypes: Standard)
721 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
722 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
723 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}})
724 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
725 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackMinMax})
726 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
727 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
728 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}});
729
730 addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF})
731 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}})
732 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {}}})
733 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {}}});
734
735 addRulesForGOpcs(OpcList: {G_CONSTANT}, FastTypes: Standard)
736 .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {}, UniCstExt}})
737 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {}})
738 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {}})
739 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}})
740 .Any(Init: {.Predicate: {UniPtr32, _}, .OperandMapping: {{SgprPtr32}, {}}})
741 .Any(Init: {.Predicate: {UniPtr64, _}, .OperandMapping: {{SgprPtr64}, {}}});
742
743 addRulesForGOpcs(OpcList: {G_FCONSTANT}, FastTypes: Standard)
744 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {}})
745 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {}})
746 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
747
748 addRulesForGOpcs(OpcList: {G_FREEZE})
749 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt}}})
750 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}})
751 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16}}})
752 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
753 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}});
754
755 addRulesForGOpcs(OpcList: {G_BITCAST})
756 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
757 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}});
758
759 addRulesForGOpcs(OpcList: {G_UNMERGE_VALUES})
760 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, UnmergeToShiftTrunc}})
761 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
762 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
763
764 addRulesForGOpcs(OpcList: {G_BUILD_VECTOR, G_MERGE_VALUES})
765 .Any(Init: {.Predicate: {UniBRC, S16}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
766 .Any(Init: {.Predicate: {UniBRC, BRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
767 .Any(Init: {.Predicate: {DivBRC, S16}, .OperandMapping: {{}, {}, ApplyAllVgpr}})
768 .Any(Init: {.Predicate: {DivBRC, BRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
769
770 addRulesForGOpcs(OpcList: {G_CONCAT_VECTORS})
771 .Any(Init: {.Predicate: {UniBRC, BRC}, .OperandMapping: {{}, {}, VerifyAllSgpr}})
772 .Any(Init: {.Predicate: {DivBRC, BRC}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
773
774 addRulesForGOpcs(OpcList: {G_PHI})
775 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {}, AextToS32InIncomingBlockGPHI}})
776 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
777 .Any(Init: {.Predicate: {UniBRC}, .OperandMapping: {{}, {}, VerifyAllSgprGPHI}})
778 .Any(Init: {.Predicate: {DivBRC}, .OperandMapping: {{}, {}, VerifyAllSgprOrVgprGPHI}});
779
780 addRulesForGOpcs(OpcList: {G_EXTRACT_VECTOR_ELT})
781 .Any(Init: {.Predicate: {UniB32, UniBRC, UniS32}, .OperandMapping: {{SgprB32}, {SgprBRC, Sgpr32}}})
782 .Any(Init: {.Predicate: {DivB32, DivBRC, UniS32}, .OperandMapping: {{VgprB32}, {VgprBRC, Sgpr32}}})
783 .Any(Init: {.Predicate: {DivB32, BRC, DivS32},
784 .OperandMapping: {{VgprB32}, {VgprBRC, Vgpr32}, ExtrVecEltToSel}})
785 .Any(Init: {.Predicate: {UniB64, UniBRC, UniS32}, .OperandMapping: {{SgprB64}, {SgprBRC, Sgpr32}}})
786 .Any(Init: {.Predicate: {DivB64, DivBRC, UniS32},
787 .OperandMapping: {{VgprB64}, {VgprBRC, Sgpr32}, ExtrVecEltTo32}})
788 .Any(Init: {.Predicate: {DivB64, BRC, DivS32},
789 .OperandMapping: {{VgprB64}, {VgprBRC, Vgpr32}, ExtrVecEltToSel}});
790
791 addRulesForGOpcs(OpcList: {G_INSERT_VECTOR_ELT})
792 .Any(Init: {.Predicate: {UniBRC, UniBRC, UniB32, UniS32},
793 .OperandMapping: {{SgprBRC}, {SgprBRC, SgprB32, Sgpr32}}})
794 .Any(
795 Init: {.Predicate: {DivBRC, BRC, B32, UniS32}, .OperandMapping: {{VgprBRC}, {VgprBRC, VgprB32, Sgpr32}}})
796 .Any(Init: {.Predicate: {DivBRC, BRC, B32, DivS32},
797 .OperandMapping: {{VgprBRC}, {VgprBRC, VgprB32, Vgpr32}, InsVecEltToSel}})
798 .Any(Init: {.Predicate: {UniBRC, UniBRC, UniB64, UniS32},
799 .OperandMapping: {{SgprBRC}, {SgprBRC, SgprB64, Sgpr32}, InsVecEltToSel}})
800 .Any(Init: {.Predicate: {DivBRC, BRC, B64, UniS32},
801 .OperandMapping: {{VgprBRC}, {VgprBRC, VgprB64, Sgpr32}, InsVecEltTo32}})
802 .Any(Init: {.Predicate: {DivBRC, BRC, B64, DivS32},
803 .OperandMapping: {{VgprBRC}, {VgprBRC, VgprB64, Vgpr32}, InsVecEltToSel}});
804
805 // INTERSECT_RAY {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
806 // INTERSECT_RAY {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
807 addRulesForGOpcs(OpcList: {G_AMDGPU_BVH_INTERSECT_RAY, G_AMDGPU_BVH_DUAL_INTERSECT_RAY,
808 G_AMDGPU_BVH8_INTERSECT_RAY})
809 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyBVH_INTERSECT_RAY}});
810
811 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
812 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
813 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
814 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
815 addRulesForGOpcs(OpcList: {G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
816 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
817 G_AMDGPU_INTRIN_IMAGE_STORE,
818 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
819 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyINTRIN_IMAGE}});
820
821 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
822 auto Pred =
823 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
824 return CmpInst::isSigned(Pred);
825 });
826
827 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
828 auto Pred =
829 static_cast<CmpInst::Predicate>(MI.getOperand(i: 1).getPredicate());
830 return ICmpInst::isEquality(P: Pred);
831 });
832
833 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
834 // clang-format off
835 addRulesForGOpcs(OpcList: {G_ICMP})
836 .Any(Init: {.Predicate: {{UniS1, _, S16}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
837 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
838 .Any(Init: {.Predicate: {{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
839 .Any(Init: {.Predicate: {{DivS1, _, S16}}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
840 .Any(Init: {.Predicate: {{UniS1, _, S32}}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
841 .Any(Init: {.Predicate: {{DivS1, _, S32}}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
842 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, STPred: HasScalarCompareEq64)
843 .Any(Init: {.Predicate: {{UniS1, _, S64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, STPred: !HasScalarCompareEq64)
844 .Any(Init: {.Predicate: {{UniS1, _, S64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
845 .Any(Init: {.Predicate: {{DivS1, _, S64}}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}})
846 .Any(Init: {.Predicate: {{UniS1, _, Ptr32}}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
847 .Any(Init: {.Predicate: {{DivS1, _, Ptr32}}, .OperandMapping: {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
848 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, STPred: HasScalarCompareEq64)
849 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, STPred: !HasScalarCompareEq64)
850 .Any(Init: {.Predicate: {{UniS1, _, Ptr64}, !isEqualityICmp}, .OperandMapping: {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
851 .Any(Init: {.Predicate: {{DivS1, _, Ptr64}}, .OperandMapping: {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
852 // clang-format on
853
854 addRulesForGOpcs(OpcList: {G_BRCOND})
855 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}})
856 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}});
857
858 addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}});
859
860 addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB)
861 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
862 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}})
863 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}})
864 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}})
865 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select})
866 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}});
867
868 addRulesForGOpcs(OpcList: {G_ANYEXT})
869 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
870 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
871 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away
872 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
873 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
874 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
875 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
876 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
877 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
878 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
879
880 bool Has16bitCmp = ST->has16BitInsts();
881
882 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
883 // It is up to user to deal with truncated bits.
884 // S1, S16, S32 and S64 results are handled with specific rules. Remaining
885 // (result, source) pairs with valid register classes are covered by the
886 // generic UniBRC/DivBRC wildcard rules.
887 addRulesForGOpcs(OpcList: {G_TRUNC})
888 .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away
889 .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away
890 .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away
891 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}})
892 .Any(Init: {.Predicate: {UniBRC, UniBRC}, .OperandMapping: {{SgprBRC}, {SgprBRC}}})
893 .Any(Init: {.Predicate: {DivBRC, DivBRC}, .OperandMapping: {{VgprBRC}, {VgprBRC}}})
894 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}})
895 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
896 // This is non-trivial. VgprToVccCopy is done using compare instruction.
897 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}, STPred: Has16bitCmp)
898 .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr32AExt}, VgprToVccCopy}},
899 STPred: !Has16bitCmp)
900 .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}})
901 .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}});
902
903 addRulesForGOpcs(OpcList: {G_ZEXT})
904 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
905 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
906 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
907 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
908 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
909 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
910 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
911 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
912 // not extending S16 to S32 is questionable.
913 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
914 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
915 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
916 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
917
918 addRulesForGOpcs(OpcList: {G_SEXT})
919 .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
920 .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
921 .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
922 .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}})
923 .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}})
924 .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}})
925 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}})
926 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}})
927 // not extending S16 to S32 is questionable.
928 .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
929 .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
930 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}})
931 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}});
932
933 addRulesForGOpcs(OpcList: {G_SEXT_INREG})
934 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
935 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
936 .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}})
937 .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}});
938
939 addRulesForGOpcs(OpcList: {G_ASSERT_ZEXT, G_ASSERT_SEXT}, FastTypes: Standard)
940 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Imm}})
941 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Imm}})
942 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Imm}})
943 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Imm}});
944
945 addRulesForGOpcs(OpcList: {G_ASSERT_ALIGN}, FastTypes: Standard)
946 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
947 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
948 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
949 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
950 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32}}})
951 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32}}})
952 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64}}})
953 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64}}});
954
955 // Atomic read-modify-write operations: result and value are always VGPR,
956 // pointer varies by address space.
957 addRulesForGOpcs(OpcList: {G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
958 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
959 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
960 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
961 G_ATOMICRMW_UDEC_WRAP, G_ATOMICRMW_FMIN, G_ATOMICRMW_FMAX})
962 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
963 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
964 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
965 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
966 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
967 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}});
968
969 addRulesForGOpcs(OpcList: {G_ATOMICRMW_USUB_SAT, G_ATOMICRMW_USUB_COND})
970 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
971 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
972 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}});
973
974 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
975 bool HasAtomicBufferGlobalPkAddF16Insts =
976 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
977 ST->hasAtomicBufferGlobalPkAddF16Insts();
978 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
979 addRulesForGOpcs(OpcList: {G_ATOMICRMW_FADD})
980 .Any(Init: {.Predicate: {DivS32, P0, S32}, .OperandMapping: {{Vgpr32}, {VgprP0, Vgpr32}}})
981 .Any(Init: {.Predicate: {DivS64, P0, S64}, .OperandMapping: {{Vgpr64}, {VgprP0, Vgpr64}}})
982 .Any(Init: {.Predicate: {DivS32, P1, S32}, .OperandMapping: {{Vgpr32}, {VgprP1, Vgpr32}}})
983 .Any(Init: {.Predicate: {DivS64, P1, S64}, .OperandMapping: {{Vgpr64}, {VgprP1, Vgpr64}}})
984 .Any(Init: {.Predicate: {DivS32, P3, S32}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32}}})
985 .Any(Init: {.Predicate: {DivS64, P3, S64}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64}}})
986 .Any(Init: {.Predicate: {DivV2S16, P0, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP0, VgprV2S16}}},
987 STPred: HasAtomicFlatPkAdd16Insts)
988 .Any(Init: {.Predicate: {DivV2S16, P1, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP1, VgprV2S16}}},
989 STPred: HasAtomicBufferGlobalPkAddF16Insts)
990 .Any(Init: {.Predicate: {DivV2S16, P3, V2S16}, .OperandMapping: {{VgprV2S16}, {VgprP3, VgprV2S16}}},
991 STPred: HasAtomicDsPkAdd16Insts);
992
993 addRulesForGOpcs(OpcList: {G_ATOMIC_CMPXCHG})
994 .Any(Init: {.Predicate: {DivS32, P2}, .OperandMapping: {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
995 .Any(Init: {.Predicate: {DivS64, P2}, .OperandMapping: {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
996 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
997 .Any(Init: {.Predicate: {DivS64, P3}, .OperandMapping: {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
998
999 addRulesForGOpcs(OpcList: {G_AMDGPU_ATOMIC_CMPXCHG})
1000 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0, VgprV2S32}}})
1001 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1, VgprV2S32}}})
1002 .Any(Init: {.Predicate: {DivS64, P0}, .OperandMapping: {{Vgpr64}, {VgprP0, VgprV2S64}}})
1003 .Any(Init: {.Predicate: {DivS64, P1}, .OperandMapping: {{Vgpr64}, {VgprP1, VgprV2S64}}});
1004
1005 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, FastTypes: Standard)
1006 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
1007 {Vgpr32, Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1008 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64},
1009 {Vgpr64, Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1010
1011 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_ADD, G_AMDGPU_BUFFER_ATOMIC_AND,
1012 G_AMDGPU_BUFFER_ATOMIC_DEC, G_AMDGPU_BUFFER_ATOMIC_FMAX,
1013 G_AMDGPU_BUFFER_ATOMIC_FMIN, G_AMDGPU_BUFFER_ATOMIC_INC,
1014 G_AMDGPU_BUFFER_ATOMIC_OR, G_AMDGPU_BUFFER_ATOMIC_SMAX,
1015 G_AMDGPU_BUFFER_ATOMIC_SMIN, G_AMDGPU_BUFFER_ATOMIC_SUB,
1016 G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
1017 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_XOR},
1018 FastTypes: Standard)
1019 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1020 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1021
1022 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
1023 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
1024 bool usesTrue16 = ST->useRealTrue16Insts();
1025
1026 Predicate isAlign16([](const MachineInstr &MI) -> bool {
1027 return (*MI.memoperands_begin())->getAlign() >= Align(16);
1028 });
1029
1030 Predicate isAlign4([](const MachineInstr &MI) -> bool {
1031 return (*MI.memoperands_begin())->getAlign() >= Align(4);
1032 });
1033
1034 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
1035 return (*MI.memoperands_begin())->isAtomic();
1036 });
1037
1038 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
1039 return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin());
1040 });
1041
1042 Predicate isConst([](const MachineInstr &MI) -> bool {
1043 // Address space in MMO be different then address space on pointer.
1044 const MachineMemOperand *MMO = *MI.memoperands_begin();
1045 const unsigned AS = MMO->getAddrSpace();
1046 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1047 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1048 });
1049
1050 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
1051 return (*MI.memoperands_begin())->isVolatile();
1052 });
1053
1054 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
1055 return (*MI.memoperands_begin())->isInvariant();
1056 });
1057
1058 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
1059 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
1060 });
1061
1062 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
1063 const MachineMemOperand *MMO = *MI.memoperands_begin();
1064 return MMO->getAlign() >= Align(MMO->getSize().getValue());
1065 });
1066
1067 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
1068 const MachineMemOperand *MMO = *MI.memoperands_begin();
1069 const unsigned MemSize = 8 * MMO->getSize().getValue();
1070 return MemSize == 16 || MemSize == 8;
1071 });
1072
1073 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
1074 const MachineMemOperand *MMO = *MI.memoperands_begin();
1075 return 8 * MMO->getSize().getValue() == 32;
1076 });
1077
1078 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
1079 (isConst || isInvMMO || isNoClobberMMO);
1080
1081 // clang-format off
1082 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
1083 addRulesForGOpcs(OpcList: {G_LOAD})
1084 // flat, addrspace(0), never uniform - flat_load
1085 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
1086 .Any(Init: {.Predicate: {DivB32, P0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1087 .Any(Init: {.Predicate: {DivB64, P0}, .OperandMapping: {{VgprB64}, {VgprP0}}})
1088 .Any(Init: {.Predicate: {DivB96, P0}, .OperandMapping: {{VgprB96}, {VgprP0}}})
1089 .Any(Init: {.Predicate: {DivB128, P0}, .OperandMapping: {{VgprB128}, {VgprP0}}})
1090
1091 // global, addrspace(1)
1092 // divergent - global_load
1093 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
1094 .Any(Init: {.Predicate: {DivB32, P1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
1095 .Any(Init: {.Predicate: {DivB64, P1}, .OperandMapping: {{VgprB64}, {VgprP1}}})
1096 .Any(Init: {.Predicate: {DivB96, P1}, .OperandMapping: {{VgprB96}, {VgprP1}}})
1097 .Any(Init: {.Predicate: {DivB128, P1}, .OperandMapping: {{VgprB128}, {VgprP1}}})
1098 .Any(Init: {.Predicate: {DivB256, P1}, .OperandMapping: {{VgprB256}, {VgprP1}, SplitLoad}})
1099 .Any(Init: {.Predicate: {DivB512, P1}, .OperandMapping: {{VgprB512}, {VgprP1}, SplitLoad}})
1100
1101 // uniform - s_load
1102 .Any(Init: {.Predicate: {{UniS16, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1103 .Any(Init: {.Predicate: {{UniS16, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1104 .Any(Init: {.Predicate: {{UniB32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1105 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
1106 .Any(Init: {.Predicate: {{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1107 .Any(Init: {.Predicate: {{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP1}}}) //32-bit load
1108 .Any(Init: {.Predicate: {{UniB64, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP1}}})
1109 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, WidenLoad}}, STPred: !hasSMRDx3)
1110 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}, SplitLoad}}, STPred: !hasSMRDx3)
1111 .Any(Init: {.Predicate: {{UniB96, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP1}}}, STPred: hasSMRDx3)
1112 .Any(Init: {.Predicate: {{UniB128, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP1}}})
1113 .Any(Init: {.Predicate: {{UniB256, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}})
1114 .Any(Init: {.Predicate: {{UniB512, P1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}})
1115
1116 // Uniform via global or buffer load, for example volatile or non-aligned
1117 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
1118 // selected as global_load, use SgprP1 for pointer instead to match
1119 // patterns without flat-for-global, default for GFX7 and older.
1120 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
1121 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
1122 .Any(Init: {.Predicate: {{UniS16, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1123 .Any(Init: {.Predicate: {{UniS16, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP1}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
1124 .Any(Init: {.Predicate: {{UniB32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1125 .Any(Init: {.Predicate: {{UniB32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1126 .Any(Init: {.Predicate: {{UniB64, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP1}}})
1127 .Any(Init: {.Predicate: {{UniB96, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP1}}})
1128 .Any(Init: {.Predicate: {{UniB128, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP1}}})
1129 .Any(Init: {.Predicate: {{UniB256, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP1}, SplitLoad}})
1130 .Any(Init: {.Predicate: {{UniB512, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP1}, SplitLoad}})
1131
1132 // local, addrspace(3) - ds_load
1133 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
1134 .Any(Init: {.Predicate: {DivB32, P3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1135 .Any(Init: {.Predicate: {DivB64, P3}, .OperandMapping: {{VgprB64}, {VgprP3}}})
1136 .Any(Init: {.Predicate: {DivB96, P3}, .OperandMapping: {{VgprB96}, {VgprP3}}})
1137 .Any(Init: {.Predicate: {DivB128, P3}, .OperandMapping: {{VgprB128}, {VgprP3}}})
1138
1139 .Any(Init: {.Predicate: {UniS16, P3}, .OperandMapping: {{UniInVgprS16}, {SgprP3}}}, STPred: usesTrue16) // 16-bit load
1140 .Any(Init: {.Predicate: {UniB32, P3}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1141 .Any(Init: {.Predicate: {UniB64, P3}, .OperandMapping: {{UniInVgprB64}, {VgprP3}}})
1142 .Any(Init: {.Predicate: {UniB96, P3}, .OperandMapping: {{UniInVgprB96}, {VgprP3}}})
1143 .Any(Init: {.Predicate: {UniB128, P3}, .OperandMapping: {{UniInVgprB128}, {VgprP3}}})
1144
1145 // constant, addrspace(4)
1146 // divergent - global_load
1147 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
1148 .Any(Init: {.Predicate: {DivB32, P4}, .OperandMapping: {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
1149 .Any(Init: {.Predicate: {DivB64, P4}, .OperandMapping: {{VgprB64}, {VgprP4}}})
1150 .Any(Init: {.Predicate: {DivB96, P4}, .OperandMapping: {{VgprB96}, {VgprP4}}})
1151 .Any(Init: {.Predicate: {DivB128, P4}, .OperandMapping: {{VgprB128}, {VgprP4}}})
1152 .Any(Init: {.Predicate: {DivB256, P4}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}})
1153 .Any(Init: {.Predicate: {DivB512, P4}, .OperandMapping: {{VgprB512}, {VgprP4}, SplitLoad}})
1154
1155 // uniform - s_load
1156 .Any(Init: {.Predicate: {{UniS16, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1157 .Any(Init: {.Predicate: {{UniS16, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
1158 .Any(Init: {.Predicate: {{UniB32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1159 .Any(Init: {.Predicate: {{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
1160 .Any(Init: {.Predicate: {{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) //32-bit load
1161 .Any(Init: {.Predicate: {{UniB64, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB64}, {SgprP4}}})
1162 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasSMRDx3)
1163 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasSMRDx3)
1164 .Any(Init: {.Predicate: {{UniB96, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasSMRDx3)
1165 .Any(Init: {.Predicate: {{UniB128, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB128}, {SgprP4}}})
1166 .Any(Init: {.Predicate: {{UniB256, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}})
1167 .Any(Init: {.Predicate: {{UniB512, P4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}})
1168
1169 // uniform in vgpr - global_load or buffer_load
1170 .Any(Init: {.Predicate: {{UniS16, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && hasSMRDSmall) // s16 load
1171 .Any(Init: {.Predicate: {{UniS16, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS16}, {SgprP4}}}, STPred: usesTrue16 && !hasSMRDSmall) // s16 load
1172 .Any(Init: {.Predicate: {{UniB32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1173 .Any(Init: {.Predicate: {{UniB32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP4}}}, STPred: !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1174 .Any(Init: {.Predicate: {{UniB64, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB64}, {SgprP4}}})
1175 .Any(Init: {.Predicate: {{UniB96, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB96}, {SgprP4}}})
1176 .Any(Init: {.Predicate: {{UniB128, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB128}, {SgprP4}}})
1177 .Any(Init: {.Predicate: {{UniB256, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1178 .Any(Init: {.Predicate: {{UniB512, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1179
1180 // private, addrspace(5), never uniform - scratch_load
1181 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16)
1182 .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1183 .Any(Init: {.Predicate: {DivB64, P5}, .OperandMapping: {{VgprB64}, {VgprP5}}})
1184 .Any(Init: {.Predicate: {DivB96, P5}, .OperandMapping: {{VgprB96}, {VgprP5}}})
1185 .Any(Init: {.Predicate: {DivB128, P5}, .OperandMapping: {{VgprB128}, {VgprP5}}})
1186
1187 .Any(Init: {.Predicate: {DivS32, Ptr128}, .OperandMapping: {{Vgpr32}, {VgprPtr128}}});
1188
1189
1190 addRulesForGOpcs(OpcList: {G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1191 .Any(Init: {.Predicate: {DivS32, P0}, .OperandMapping: {{Vgpr32}, {VgprP0}}})
1192 .Any(Init: {.Predicate: {DivS16, P0}, .OperandMapping: {{Vgpr16}, {VgprP0}}}, STPred: usesTrue16)
1193
1194 .Any(Init: {.Predicate: {DivS32, P1}, .OperandMapping: {{Vgpr32}, {VgprP1}}})
1195 .Any(Init: {.Predicate: {DivS16, P1}, .OperandMapping: {{Vgpr16}, {VgprP1}}}, STPred: usesTrue16)
1196 .Any(Init: {.Predicate: {{UniS32, P1}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1197 .Any(Init: {.Predicate: {{UniS32, P1}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP1}}}, STPred: hasSMRDSmall)
1198 .Any(Init: {.Predicate: {{UniS32, P1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: !hasSMRDSmall)
1199 .Any(Init: {.Predicate: {{UniS32, P1}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP1}}}, STPred: hasSMRDSmall)
1200
1201 .Any(Init: {.Predicate: {DivS32, P3}, .OperandMapping: {{Vgpr32}, {VgprP3}}})
1202 .Any(Init: {.Predicate: {DivS16, P3}, .OperandMapping: {{Vgpr16}, {VgprP3}}}, STPred: usesTrue16)
1203 .Any(Init: {.Predicate: {UniS32, P3}, .OperandMapping: {{UniInVgprS32}, {VgprP3}}})
1204
1205 .Any(Init: {.Predicate: {DivS32, P4}, .OperandMapping: {{Vgpr32}, {VgprP4}}})
1206 .Any(Init: {.Predicate: {DivS16, P4}, .OperandMapping: {{Vgpr16}, {VgprP4}}}, STPred: usesTrue16)
1207 .Any(Init: {.Predicate: {{UniS32, P4}, isAlign4 && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, STPred: !hasSMRDSmall)
1208 .Any(Init: {.Predicate: {{UniS32, P4}, isNaturalAligned && isUL}, .OperandMapping: {{Sgpr32}, {SgprP4}}}, STPred: hasSMRDSmall)
1209 .Any(Init: {.Predicate: {{UniS32, P4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: !hasSMRDSmall)
1210 .Any(Init: {.Predicate: {{UniS32, P4}, !isNaturalAligned || !isUL}, .OperandMapping: {{UniInVgprS32}, {SgprP4}}}, STPred: hasSMRDSmall)
1211
1212 .Any(Init: {.Predicate: {DivS32, P5}, .OperandMapping: {{Vgpr32}, {VgprP5}}})
1213 .Any(Init: {.Predicate: {DivS16, P5}, .OperandMapping: {{Vgpr16}, {VgprP5}}}, STPred: usesTrue16);
1214
1215 addRulesForGOpcs(OpcList: {G_STORE})
1216 // addrspace(0)
1217 .Any(Init: {.Predicate: {S16, P0}, .OperandMapping: {{}, {Vgpr16, VgprP0}}}, STPred: usesTrue16) // 16-bit store
1218 .Any(Init: {.Predicate: {B32, P0}, .OperandMapping: {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1219 .Any(Init: {.Predicate: {B64, P0}, .OperandMapping: {{}, {VgprB64, VgprP0}}})
1220 .Any(Init: {.Predicate: {B96, P0}, .OperandMapping: {{}, {VgprB96, VgprP0}}})
1221 .Any(Init: {.Predicate: {B128, P0}, .OperandMapping: {{}, {VgprB128, VgprP0}}})
1222
1223 // addrspace(1), there are no stores to addrspace(4)
1224 // For targets:
1225 // - with "+flat-for-global" - global_store
1226 // - without(-flat-for-global) - buffer_store addr64
1227 .Any(Init: {.Predicate: {S16, DivP1}, .OperandMapping: {{}, {Vgpr16, VgprP1}}}, STPred: usesTrue16) // 16-bit store
1228 .Any(Init: {.Predicate: {B32, DivP1}, .OperandMapping: {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1229 .Any(Init: {.Predicate: {B64, DivP1}, .OperandMapping: {{}, {VgprB64, VgprP1}}})
1230 .Any(Init: {.Predicate: {B96, DivP1}, .OperandMapping: {{}, {VgprB96, VgprP1}}})
1231 .Any(Init: {.Predicate: {B128, DivP1}, .OperandMapping: {{}, {VgprB128, VgprP1}}})
1232
1233 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1234 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1235 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1236 .Any(Init: {.Predicate: {S16, UniP1}, .OperandMapping: {{}, {Vgpr16, SgprP1}}}, STPred: usesTrue16) // 16-bit store
1237 .Any(Init: {.Predicate: {B32, UniP1}, .OperandMapping: {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1238 .Any(Init: {.Predicate: {B64, UniP1}, .OperandMapping: {{}, {VgprB64, SgprP1}}})
1239 .Any(Init: {.Predicate: {B96, UniP1}, .OperandMapping: {{}, {VgprB96, SgprP1}}})
1240 .Any(Init: {.Predicate: {B128, UniP1}, .OperandMapping: {{}, {VgprB128, SgprP1}}})
1241
1242 // addrspace(3) and addrspace(5)
1243 .Any(Init: {.Predicate: {S16, Ptr32}, .OperandMapping: {{}, {Vgpr16, VgprPtr32}}}, STPred: usesTrue16) // 16-bit store
1244 .Any(Init: {.Predicate: {B32, Ptr32}, .OperandMapping: {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1245 .Any(Init: {.Predicate: {B64, Ptr32}, .OperandMapping: {{}, {VgprB64, VgprPtr32}}})
1246 .Any(Init: {.Predicate: {B96, Ptr32}, .OperandMapping: {{}, {VgprB96, VgprPtr32}}})
1247 .Any(Init: {.Predicate: {B128, Ptr32}, .OperandMapping: {{}, {VgprB128, VgprPtr32}}});
1248
1249 // clang-format on
1250
1251 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1252 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1253 FastTypes: StandardB)
1254 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1255 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1256 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1257 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1258 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1259 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1260 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1261 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1262
1263 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1264 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1265 FastTypes: StandardB)
1266 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1267 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1268
1269 addRulesForGOpcs(
1270 OpcList: {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1271 FastTypes: StandardB)
1272 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1273 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1274
1275 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1276 FastTypes: StandardB)
1277 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1278 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1279 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1280 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1281 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1282 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1283 .Any(Init: {.Predicate: {DivB160}, .OperandMapping: {{VgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1284 .Any(Init: {.Predicate: {UniB160},
1285 .OperandMapping: {{UniInVgprB160}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1286
1287 addRulesForGOpcs(
1288 OpcList: {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1289 FastTypes: StandardB)
1290 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1291 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1292 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1293 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1294 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}})
1295 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}});
1296
1297 addRulesForGOpcs(OpcList: {G_AMDGPU_S_BUFFER_LOAD})
1298 // waterfall expansion is part of S_BUF_to_BUF
1299 .Any(Init: {.Predicate: {UniB32}, .OperandMapping: {{SgprB32}, {SgprV4S32, Sgpr32}}})
1300 .Any(Init: {.Predicate: {DivB32, UniV4S32, DivB32},
1301 .OperandMapping: {{VgprB32}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1302 .Any(Init: {.Predicate: {DivB32, DivV4S32, UniB32},
1303 .OperandMapping: {{VgprB32}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1304 .Any(Init: {.Predicate: {DivB32, DivV4S32, DivB32},
1305 .OperandMapping: {{VgprB32}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1306
1307 .Any(Init: {.Predicate: {UniB64}, .OperandMapping: {{SgprB64}, {SgprV4S32, Sgpr32}}})
1308 .Any(Init: {.Predicate: {DivB64, UniV4S32, DivB32},
1309 .OperandMapping: {{VgprB64}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1310 .Any(Init: {.Predicate: {DivB64, DivV4S32, UniB32},
1311 .OperandMapping: {{VgprB64}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1312 .Any(Init: {.Predicate: {DivB64, DivV4S32, DivB32},
1313 .OperandMapping: {{VgprB64}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1314
1315 .Any(Init: {.Predicate: {UniB96}, .OperandMapping: {{SgprB96}, {SgprV4S32, Sgpr32}}})
1316 .Any(Init: {.Predicate: {DivB96, UniV4S32, DivB32},
1317 .OperandMapping: {{VgprB96}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1318 .Any(Init: {.Predicate: {DivB96, DivV4S32, UniB32},
1319 .OperandMapping: {{VgprB96}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1320 .Any(Init: {.Predicate: {DivB96, DivV4S32, DivB32},
1321 .OperandMapping: {{VgprB96}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1322
1323 .Any(Init: {.Predicate: {UniB128}, .OperandMapping: {{SgprB128}, {SgprV4S32, Sgpr32}}})
1324 .Any(Init: {.Predicate: {DivB128, UniV4S32, DivB32},
1325 .OperandMapping: {{VgprB128}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1326 .Any(Init: {.Predicate: {DivB128, DivV4S32, UniB32},
1327 .OperandMapping: {{VgprB128}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1328 .Any(Init: {.Predicate: {DivB128, DivV4S32, DivB32},
1329 .OperandMapping: {{VgprB128}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1330
1331 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {SgprV4S32, Sgpr32}}})
1332 .Any(Init: {.Predicate: {DivB256, UniV4S32, DivB32},
1333 .OperandMapping: {{VgprB256}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1334 .Any(Init: {.Predicate: {DivB256, DivV4S32, UniB32},
1335 .OperandMapping: {{VgprB256}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1336 .Any(Init: {.Predicate: {DivB256, DivV4S32, DivB32},
1337 .OperandMapping: {{VgprB256}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1338
1339 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {SgprV4S32, Sgpr32}}})
1340 .Any(Init: {.Predicate: {DivB512, UniV4S32, DivB32},
1341 .OperandMapping: {{VgprB512}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1342 .Any(Init: {.Predicate: {DivB512, DivV4S32, UniB32},
1343 .OperandMapping: {{VgprB512}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1344 .Any(Init: {.Predicate: {DivB512, DivV4S32, DivB32},
1345 .OperandMapping: {{VgprB512}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}});
1346
1347 addRulesForGOpcs(OpcList: {G_AMDGPU_S_BUFFER_LOAD_SBYTE, G_AMDGPU_S_BUFFER_LOAD_UBYTE,
1348 G_AMDGPU_S_BUFFER_LOAD_SSHORT,
1349 G_AMDGPU_S_BUFFER_LOAD_USHORT})
1350 .Any(Init: {.Predicate: {UniS32, UniV4S32, UniS32}, .OperandMapping: {{Sgpr32}, {SgprV4S32, Sgpr32}}})
1351 .Any(Init: {.Predicate: {DivS32, UniV4S32, DivS32},
1352 .OperandMapping: {{Vgpr32}, {SgprV4S32, Vgpr32}, S_BUF_to_BUF}})
1353 .Any(Init: {.Predicate: {DivS32, DivV4S32, UniS32},
1354 .OperandMapping: {{Vgpr32}, {VgprV4S32, Sgpr32}, S_BUF_to_BUF}})
1355 .Any(Init: {.Predicate: {DivS32, DivV4S32, DivS32},
1356 .OperandMapping: {{Vgpr32}, {VgprV4S32, Vgpr32}, S_BUF_to_BUF}});
1357
1358 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1359 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1360 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1361 G_AMDGPU_TBUFFER_STORE_FORMAT,
1362 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1363 .Any(Init: {.Predicate: {B32}, .OperandMapping: {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1364 .Any(Init: {.Predicate: {B64}, .OperandMapping: {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1365 .Any(Init: {.Predicate: {B96}, .OperandMapping: {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1366 .Any(Init: {.Predicate: {B128}, .OperandMapping: {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1367
1368 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1369 // address components are VGPR.
1370 //
1371 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1372 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1373 // idxen_imm
1374 addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_ATOMIC_FADD})
1375 .Any(Init: {.Predicate: {S32, S32, V4S32, S32, S32, S32},
1376 .OperandMapping: {{Vgpr32}, {Vgpr32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1377 .Any(Init: {.Predicate: {S64, S64, V4S32, S32, S32, S32},
1378 .OperandMapping: {{Vgpr64}, {Vgpr64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1379 .Any(Init: {.Predicate: {V2S16, V2S16, V4S32, S32, S32, S32},
1380 .OperandMapping: {{VgprV2S16},
1381 {VgprV2S16, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1382
1383 addRulesForGOpcs(OpcList: {G_PTR_ADD})
1384 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1385 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1386 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1387 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1388
1389 addRulesForGOpcs(OpcList: {G_INTTOPTR})
1390 .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}})
1391 .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}})
1392 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}})
1393 .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}})
1394 .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}})
1395 .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}});
1396
1397 addRulesForGOpcs(OpcList: {G_PTRTOINT})
1398 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}})
1399 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}})
1400 .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}})
1401 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}})
1402 .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}})
1403 .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}});
1404
1405 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1406 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1407 addRulesForGOpcs(OpcList: {G_PTRMASK})
1408 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}})
1409 .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}})
1410 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {SgprP3, Sgpr32}}})
1411 .Any(Init: {.Predicate: {DivP3}, .OperandMapping: {{VgprP3}, {VgprP3, Vgpr32}}});
1412
1413 addRulesForGOpcs(OpcList: {G_DYN_STACKALLOC})
1414 .Any(Init: {.Predicate: {UniP5, UniS32}, .OperandMapping: {{SgprP5}, {Sgpr32}, DynStackAlloc}})
1415 .Any(Init: {.Predicate: {UniP5, DivS32}, .OperandMapping: {{SgprP5}, {Vgpr32}, DynStackAlloc}});
1416
1417 addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard)
1418 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}})
1419 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}, AbsToNegMax})
1420 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1421 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}, AbsToNegMax})
1422 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, AbsToS32})
1423 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}, AbsToNegMax});
1424
1425 addRulesForGOpcs(OpcList: {G_BITREVERSE}, FastTypes: Standard)
1426 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}})
1427 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1428 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64}})
1429 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1430
1431 addRulesForGOpcs(OpcList: {G_AMDGPU_FFBH_U32, G_AMDGPU_FFBL_B32, G_CTLZ_ZERO_POISON,
1432 G_CTTZ_ZERO_POISON})
1433 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
1434 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1435 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
1436 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}, SplitBitCount64To32}});
1437
1438 addRulesForGOpcs(OpcList: {G_CTPOP})
1439 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}})
1440 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1441 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}})
1442 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}, CtPop64To32}});
1443
1444 addRulesForGOpcs(OpcList: {G_FENCE}).Any(Init: {.Predicate: {{}}, .OperandMapping: {{}, {}}});
1445
1446 addRulesForGOpcs(OpcList: {G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, FastTypes: Standard)
1447 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {}});
1448
1449 addRulesForGOpcs(OpcList: {G_GET_ROUNDING}, FastTypes: Standard)
1450 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {}, LowerGetRounding});
1451
1452 addRulesForGOpcs(OpcList: {G_SET_ROUNDING}, FastTypes: Standard)
1453 .Uni(Ty: S32, RuleApplyIDs: {{}, {SgprB32_ReadFirstLane}, LowerSetRounding})
1454 .Div(Ty: S32, RuleApplyIDs: {{}, {SgprB32_ReadFirstLane}, LowerSetRounding});
1455
1456 addRulesForGOpcs(OpcList: {G_BLOCK_ADDR}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1457
1458 addRulesForGOpcs(OpcList: {G_GLOBAL_VALUE})
1459 .Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}})
1460 .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {}}})
1461 .Any(Init: {.Predicate: {UniP3}, .OperandMapping: {{SgprP3}, {}}})
1462 .Any(Init: {.Predicate: {UniP4}, .OperandMapping: {{SgprP4}, {}}})
1463 .Any(Init: {.Predicate: {UniP8}, .OperandMapping: {{SgprP8}, {}}});
1464
1465 addRulesForGOpcs(OpcList: {G_AMDGPU_WAVE_ADDRESS}).Any(Init: {.Predicate: {UniP5}, .OperandMapping: {{SgprP5}, {}}});
1466
1467 addRulesForGOpcs(OpcList: {G_AMDGPU_SPONENTRY}, FastTypes: Standard).Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {}});
1468
1469 addRulesForGOpcs(OpcList: {G_SI_CALL})
1470 .Any(Init: {.Predicate: {_, UniP0}, .OperandMapping: {{None}, {SgprP0}}})
1471 .Any(Init: {.Predicate: {_, DivP0}, .OperandMapping: {{None}, {SgprP0Call_WF}}})
1472 .Any(Init: {.Predicate: {_, UniP4}, .OperandMapping: {{None}, {SgprP4}}})
1473 .Any(Init: {.Predicate: {_, DivP4}, .OperandMapping: {{None}, {SgprP4Call_WF}}});
1474
1475 bool hasSALUFloat = ST->hasSALUFloatInsts();
1476
1477 addRulesForGOpcs(OpcList: {G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, FastTypes: Standard)
1478 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1479 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1480 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1481 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1482 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1483 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1484 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1485 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1486 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, STPred: !hasSALUFloat)
1487 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, ScalarizeToS16},
1488 STPred: hasSALUFloat)
1489 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1490 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32}}})
1491 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32}}})
1492 .Any(Init: {.Predicate: {UniV2S64}, .OperandMapping: {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64}}})
1493 .Any(Init: {.Predicate: {DivV2S64}, .OperandMapping: {{VgprV2S64}, {VgprV2S64, VgprV2S64}}});
1494
1495 addRulesForGOpcs(OpcList: {G_FSUB, G_STRICT_FSUB}, FastTypes: Standard)
1496 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1497 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1498 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1499 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1500 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1501 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1502
1503 addRulesForGOpcs(OpcList: {G_FMAD}, FastTypes: Standard)
1504 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1505 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1506 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1507 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1508
1509 addRulesForGOpcs(OpcList: {G_FLDEXP, G_STRICT_FLDEXP}, FastTypes: Standard)
1510 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1511 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1512 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1513 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1514 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1515 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}});
1516
1517 addRulesForGOpcs(OpcList: {G_FMA, G_STRICT_FMA}, FastTypes: Standard)
1518 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1519 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1520 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1521 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1522 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}})
1523 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1524 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32, VgprV2S32, VgprV2S32}}})
1525 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1526 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1527 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1528 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
1529 .Uni(Ty: V2S16,
1530 RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16, SgprV2S16}, ScalarizeToS16},
1531 STPred: hasSALUFloat)
1532 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16, VgprV2S16}},
1533 STPred: !hasSALUFloat)
1534 .Any(Init: {.Predicate: {UniV2S64}, .OperandMapping: {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64, VgprV2S64}}})
1535 .Any(Init: {.Predicate: {DivV2S64}, .OperandMapping: {{VgprV2S64}, {VgprV2S64, VgprV2S64, VgprV2S64}}});
1536
1537 addRulesForGOpcs(OpcList: {G_AMDGPU_FMED3}, FastTypes: Standard)
1538 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1539 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1540 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1541 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1542
1543 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1544 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1545 // instructions on SALU.
1546 addRulesForGOpcs(OpcList: {G_AMDGPU_SMED3}, FastTypes: Standard)
1547 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1548 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1549
1550 // FNEG and FABS are either folded as source modifiers or can be selected as
1551 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1552 // targets without SALU float we still select them as VGPR since there would
1553 // be no real sgpr use.
1554 addRulesForGOpcs(OpcList: {G_FNEG, G_FABS}, FastTypes: Standard)
1555 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasSALUFloat)
1556 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1557 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1558 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}}, STPred: !hasSALUFloat)
1559 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1560 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1561 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1562 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1563 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}}, STPred: !hasSALUFloat)
1564 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, STPred: hasSALUFloat)
1565 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1566 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1567 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}});
1568
1569 addRulesForGOpcs(OpcList: {G_FCANONICALIZE}, FastTypes: Standard)
1570 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1571 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1572 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1573 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1574 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1575 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}})
1576 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16}})
1577 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16}})
1578 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {VgprV2S32}}})
1579 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {VgprV2S32}}})
1580 .Any(Init: {.Predicate: {UniV2S64}, .OperandMapping: {{UniInVgprV2S64}, {VgprV2S64}}})
1581 .Any(Init: {.Predicate: {DivV2S64}, .OperandMapping: {{VgprV2S64}, {VgprV2S64}}});
1582
1583 bool hasPST = ST->hasPseudoScalarTrans();
1584 addRulesForGOpcs(OpcList: {G_FSQRT}, FastTypes: Standard)
1585 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1586 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasPST)
1587 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}}, STPred: !hasPST);
1588
1589 addRulesForGOpcs(OpcList: {G_FPTOUI, G_FPTOSI, G_FPTOUI_SAT, G_FPTOSI_SAT})
1590 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1591 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1592 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1593 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat)
1594 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1595 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1596 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1597 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1598 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1599 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1600 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1601 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1602 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1603 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {SgprV2S32}}})
1604 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}});
1605
1606 addRulesForGOpcs(OpcList: {G_UITOFP, G_SITOFP})
1607 .Any(Init: {.Predicate: {UniS16, S16}, .OperandMapping: {{UniInVgprS16}, {Vgpr16}}})
1608 .Any(Init: {.Predicate: {DivS16, S16}, .OperandMapping: {{Vgpr16}, {Vgpr16}}})
1609 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1610 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1611 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1612 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat)
1613 .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat)
1614 .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}})
1615 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1616 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}});
1617
1618 addRulesForGOpcs(OpcList: {G_AMDGPU_S_BUFFER_PREFETCH})
1619 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {SgprV4S32_ReadFirstLane, Imm, SgprB32_ReadFirstLane}}});
1620
1621 Predicate IsDataPF([](const MachineInstr &MI) -> bool {
1622 // prefetch cache type: 0 == instruction (I$) prefetch, 1 == data prefetch.
1623 return MI.getOperand(i: 3).getImm() != 0;
1624 });
1625
1626 bool HasSMemPF = ST->hasSafeSmemPrefetch();
1627 bool HasVMemPF = ST->hasVmemPrefInsts();
1628 addRulesForGOpcs(OpcList: {G_PREFETCH})
1629 // Safe smem prefetch keeps both data and instruction prefetch.
1630 .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{}, {SgprPtr64}}}, STPred: HasSMemPF)
1631 // Vmem prefetch keeps data prefetch only.
1632 .Any(Init: {.Predicate: {{UniPtr64}, IsDataPF}, .OperandMapping: {{}, {SgprPtr64}}}, STPred: !HasSMemPF && HasVMemPF)
1633 .Any(Init: {.Predicate: {{UniPtr64}, IsDataPF}, .OperandMapping: {{}, {}, DeletePrefetch}},
1634 STPred: !HasSMemPF && !HasVMemPF)
1635 .Any(Init: {.Predicate: {{UniPtr64}, !IsDataPF}, .OperandMapping: {{}, {}, DeletePrefetch}}, STPred: !HasSMemPF)
1636
1637 .Any(Init: {.Predicate: {{DivPtr64}, IsDataPF}, .OperandMapping: {{}, {VgprPtr64}}}, STPred: HasVMemPF)
1638 .Any(Init: {.Predicate: {{DivPtr64}, IsDataPF}, .OperandMapping: {{}, {}, DeletePrefetch}}, STPred: !HasVMemPF)
1639 .Any(Init: {.Predicate: {{DivPtr64}, !IsDataPF}, .OperandMapping: {{}, {}, DeletePrefetch}})
1640
1641 .Any(Init: {.Predicate: {P3}, .OperandMapping: {{}, {}, DeletePrefetch}})
1642 .Any(Init: {.Predicate: {P5}, .OperandMapping: {{}, {}, DeletePrefetch}})
1643 .Any(Init: {.Predicate: {UniP6}, .OperandMapping: {{}, {SgprP6}}}, STPred: HasSMemPF)
1644 .Any(Init: {.Predicate: {UniP6}, .OperandMapping: {{}, {}, DeletePrefetch}}, STPred: !HasSMemPF);
1645
1646 addRulesForGOpcs(OpcList: {G_FPEXT})
1647 .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}})
1648 .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{UniInVgprS64}, {Vgpr32}}})
1649 .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}}})
1650 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}, STPred: hasSALUFloat)
1651 .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{UniInVgprS32}, {Vgpr16}}}, STPred: !hasSALUFloat);
1652
1653 addRulesForGOpcs(OpcList: {G_AMDGPU_CVT_PK_I16_I32}, FastTypes: Standard)
1654 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1655 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {Vgpr32, Vgpr32}});
1656
1657 addRulesForGOpcs(OpcList: {G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, FastTypes: Standard)
1658 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1659 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}});
1660
1661 bool hasSALUMinimumMaximumInsts = ST->hasSALUMinimumMaximumInsts();
1662
1663 addRulesForGOpcs(OpcList: {G_FMINIMUM, G_FMAXIMUM}, FastTypes: Standard)
1664 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUMinimumMaximumInsts)
1665 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUMinimumMaximumInsts)
1666 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1667 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUMinimumMaximumInsts)
1668 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUMinimumMaximumInsts)
1669 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1670 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1671 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1672 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1673 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1674
1675 addRulesForGOpcs(OpcList: {G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM,
1676 G_FMINIMUMNUM, G_FMAXIMUMNUM},
1677 FastTypes: Standard)
1678 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}})
1679 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}})
1680 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1681 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr64}})
1682 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}})
1683 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1684 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16, Sgpr16}}, STPred: hasSALUFloat)
1685 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16, Vgpr16}}, STPred: !hasSALUFloat)
1686 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
1687 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat);
1688
1689 addRulesForGOpcs(OpcList: {G_FPTRUNC})
1690 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1691 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1692 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}})
1693 .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{UniInVgprV2S16}, {VgprV2S32}}})
1694 .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}})
1695 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1696 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat);
1697
1698 addRulesForGOpcs(OpcList: {G_INTRINSIC_FPTRUNC_ROUND})
1699 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}, STPred: hasSALUFloat)
1700 .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{UniInVgprS16}, {Vgpr32}}}, STPred: !hasSALUFloat)
1701 .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}})
1702 .Any(Init: {.Predicate: {UniS16, S64}, .OperandMapping: {{UniInVgprS16}, {Vgpr64}}})
1703 .Any(Init: {.Predicate: {DivS16, S64}, .OperandMapping: {{Vgpr16}, {Vgpr64}}})
1704 .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{UniInVgprS32}, {Vgpr64}}})
1705 .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}});
1706
1707 addRulesForGOpcs(OpcList: {G_IS_FPCLASS})
1708 .Any(Init: {.Predicate: {DivS1, S16}, .OperandMapping: {{Vcc}, {Vgpr16}}})
1709 .Any(Init: {.Predicate: {UniS1, S16}, .OperandMapping: {{UniInVcc}, {Vgpr16}}})
1710 .Any(Init: {.Predicate: {DivS1, S32}, .OperandMapping: {{Vcc}, {Vgpr32}}})
1711 .Any(Init: {.Predicate: {UniS1, S32}, .OperandMapping: {{UniInVcc}, {Vgpr32}}})
1712 .Any(Init: {.Predicate: {DivS1, S64}, .OperandMapping: {{Vcc}, {Vgpr64}}})
1713 .Any(Init: {.Predicate: {UniS1, S64}, .OperandMapping: {{UniInVcc}, {Vgpr64}}});
1714
1715 addRulesForGOpcs(OpcList: {G_FCMP}, FastTypes: Standard)
1716 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1717 STPred: hasSALUFloat)
1718 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1719 STPred: !hasSALUFloat)
1720 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {None, Vgpr16, Vgpr16}}})
1721 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1722 STPred: hasSALUFloat)
1723 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1724 STPred: !hasSALUFloat)
1725 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}})
1726 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1727 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {None, Vgpr64, Vgpr64}}});
1728
1729 addRulesForGOpcs(OpcList: {G_INTRINSIC_ROUNDEVEN, G_FEXP2, G_FLOG2}, FastTypes: Standard)
1730 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1731 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1732 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1733 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1734 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1735 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1736
1737 addRulesForGOpcs(OpcList: {G_INTRINSIC_TRUNC, G_FFLOOR, G_FCEIL}, FastTypes: Standard)
1738 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {Vgpr16}})
1739 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {Sgpr16}}, STPred: hasSALUFloat)
1740 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16}})
1741 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32}})
1742 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32}}, STPred: hasSALUFloat)
1743 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32}})
1744 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {Vgpr64}})
1745 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64}});
1746
1747 addRulesForGOpcs(OpcList: {G_AMDGPU_GLOBAL_LOAD_MONITOR, G_AMDGPU_FLAT_LOAD_MONITOR},
1748 FastTypes: StandardB)
1749 .Uni(Ty: B32, RuleApplyIDs: {{UniInVgprB32}, {SgprPtr64}})
1750 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprPtr64}})
1751 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {SgprPtr64}})
1752 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprPtr64}})
1753 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {SgprPtr64}})
1754 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {VgprPtr64}});
1755
1756 addRulesForGOpcs(OpcList: {G_AMDGPU_WHOLE_WAVE_FUNC_SETUP})
1757 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {}}});
1758
1759 addRulesForGOpcs(OpcList: {G_AMDGPU_WHOLE_WAVE_FUNC_RETURN}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {Vcc}}});
1760
1761 using namespace Intrinsic;
1762
1763 addRulesForIOpcs(OpcList: {returnaddress}).Any(Init: {.Predicate: {UniP0}, .OperandMapping: {{SgprP0}, {}}});
1764
1765 // Note: amdgcn.icmp with i1 inputs is legalized to ballot in the legalizer,
1766 // so no S1 rules are needed here.
1767 addRulesForIOpcs(OpcList: {amdgcn_icmp})
1768 .Any(Init: {.Predicate: {UniS64, _, S16}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
1769 .Any(Init: {.Predicate: {UniS64, _, S32}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
1770 .Any(Init: {.Predicate: {UniS64, _, S64}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
1771
1772 .Any(Init: {.Predicate: {UniS32, _, S16}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
1773 .Any(Init: {.Predicate: {UniS32, _, S32}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
1774 .Any(Init: {.Predicate: {UniS32, _, S64}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
1775
1776 addRulesForIOpcs(OpcList: {amdgcn_fcmp})
1777 .Any(Init: {.Predicate: {UniS64, _, S16}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr16, Vgpr16}}})
1778 .Any(Init: {.Predicate: {UniS64, _, S32}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr32, Vgpr32}}})
1779 .Any(Init: {.Predicate: {UniS64, _, S64}, .OperandMapping: {{Sgpr64}, {IntrId, Vgpr64, Vgpr64}}})
1780
1781 .Any(Init: {.Predicate: {UniS32, _, S16}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr16, Vgpr16}}})
1782 .Any(Init: {.Predicate: {UniS32, _, S32}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr32, Vgpr32}}})
1783 .Any(Init: {.Predicate: {UniS32, _, S64}, .OperandMapping: {{Sgpr32}, {IntrId, Vgpr64, Vgpr64}}});
1784
1785 addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {}}});
1786
1787 addRulesForIOpcs(OpcList: {amdgcn_s_getreg}).Any(Init: {.Predicate: {}, .OperandMapping: {{Sgpr32}, {IntrId}}});
1788
1789 addRulesForIOpcs(OpcList: {amdgcn_s_setreg})
1790 .Any(Init: {.Predicate: {_, _, S32}, .OperandMapping: {{}, {IntrId, Imm, SgprB32_ReadFirstLane}}});
1791
1792 addRulesForIOpcs(OpcList: {amdgcn_s_sendmsg, amdgcn_s_sendmsghalt})
1793 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Imm, SgprB32_M0}}});
1794
1795 addRulesForIOpcs(OpcList: {amdgcn_s_sendmsg_rtn})
1796 .Any(Init: {.Predicate: {S32}, .OperandMapping: {{Sgpr32}, {}}})
1797 .Any(Init: {.Predicate: {S64}, .OperandMapping: {{Sgpr64}, {}}});
1798
1799 addRulesForIOpcs(OpcList: {amdgcn_s_memrealtime, amdgcn_s_memtime}, FastTypes: Standard)
1800 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId}});
1801
1802 addRulesForIOpcs(OpcList: {amdgcn_groupstaticsize, amdgcn_pops_exiting_wave_id,
1803 amdgcn_reloc_constant, amdgcn_s_get_waveid_in_workgroup},
1804 FastTypes: Standard)
1805 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId}});
1806
1807 // Intrinsics with no register operands.
1808 addRulesForIOpcs(OpcList: {amdgcn_asyncmark,
1809 amdgcn_endpgm,
1810 amdgcn_iglp_opt,
1811 amdgcn_init_exec,
1812 amdgcn_s_barrier,
1813 amdgcn_s_barrier_leave,
1814 amdgcn_s_barrier_signal,
1815 amdgcn_s_barrier_wait,
1816 amdgcn_s_monitor_sleep,
1817 amdgcn_s_nop,
1818 amdgcn_s_sethalt,
1819 amdgcn_s_setprio,
1820 amdgcn_s_setprio_inc_wg,
1821 amdgcn_s_sleep,
1822 amdgcn_s_ttracedata_imm,
1823 amdgcn_s_wait_asynccnt,
1824 amdgcn_s_wait_bvhcnt,
1825 amdgcn_s_wait_dscnt,
1826 amdgcn_s_wait_event,
1827 amdgcn_s_wait_event_export_ready,
1828 amdgcn_s_wait_expcnt,
1829 amdgcn_s_wait_kmcnt,
1830 amdgcn_s_wait_loadcnt,
1831 amdgcn_s_wait_samplecnt,
1832 amdgcn_s_wait_storecnt,
1833 amdgcn_s_wait_tensorcnt,
1834 amdgcn_s_waitcnt,
1835 amdgcn_sched_barrier,
1836 amdgcn_sched_group_barrier,
1837 amdgcn_unreachable,
1838 amdgcn_wait_asyncmark,
1839 amdgcn_wave_barrier})
1840 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}}});
1841
1842 addRulesForIOpcs(OpcList: {amdgcn_init_exec_from_input})
1843 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Sgpr32}}});
1844
1845 addRulesForIOpcs(OpcList: {amdgcn_s_ttracedata}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_M0}}});
1846
1847 addRulesForIOpcs(OpcList: {amdgcn_s_sleep_var})
1848 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_ReadFirstLane}}});
1849
1850 addRulesForIOpcs(OpcList: {amdgcn_s_barrier_join, amdgcn_s_wakeup_barrier})
1851 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_M0}}});
1852
1853 addRulesForIOpcs(OpcList: {amdgcn_s_barrier_signal_var, amdgcn_s_barrier_init})
1854 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_M0, SgprB32_M0}}});
1855
1856 addRulesForIOpcs(OpcList: {amdgcn_s_barrier_signal_isfirst})
1857 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}});
1858
1859 addRulesForIOpcs(
1860 OpcList: {amdgcn_s_get_named_barrier_state, amdgcn_s_get_barrier_state}, FastTypes: Standard)
1861 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, SgprB32_M0}});
1862
1863 addRulesForIOpcs(OpcList: {amdgcn_flat_prefetch}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP0}}});
1864
1865 addRulesForIOpcs(OpcList: {amdgcn_global_prefetch}).Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1}}});
1866
1867 addRulesForIOpcs(OpcList: {amdgcn_s_prefetch_data, amdgcn_s_prefetch_inst})
1868 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB64_ReadFirstLane, SgprB32_ReadFirstLane}}});
1869
1870 addRulesForIOpcs(OpcList: {amdgcn_class})
1871 .Any(Init: {.Predicate: {UniS1, _, S16}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr16, Vgpr32}}})
1872 .Any(Init: {.Predicate: {DivS1, _, S16}, .OperandMapping: {{Vcc}, {IntrId, Vgpr16, Vgpr32}}})
1873 .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr32, Vgpr32}}})
1874 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {IntrId, Vgpr32, Vgpr32}}})
1875 .Any(Init: {.Predicate: {UniS1, _, S64}, .OperandMapping: {{UniInVcc}, {IntrId, Vgpr64, Vgpr32}}})
1876 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {IntrId, Vgpr64, Vgpr32}}});
1877
1878 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1879 addRulesForIOpcs(OpcList: {amdgcn_end_cf})
1880 .Any(Init: {.Predicate: {_, UniS32}, .OperandMapping: {{}, {IntrId, Sgpr32}}})
1881 .Any(Init: {.Predicate: {_, UniS64}, .OperandMapping: {{}, {IntrId, Sgpr64}}});
1882
1883 addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard)
1884 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1885 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1886
1887 addRulesForIOpcs(OpcList: {amdgcn_exp})
1888 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32},
1889 .OperandMapping: {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1890
1891 addRulesForIOpcs(OpcList: {amdgcn_exp_compr})
1892 .Any(Init: {.Predicate: {_, _, _, V2S16}, .OperandMapping: {{}, {IntrId, Imm, Imm, VgprV2S16, VgprV2S16}}});
1893
1894 addRulesForIOpcs(OpcList: {amdgcn_exp_row})
1895 .Any(Init: {.Predicate: {_, _, _, S32, S32, S32, S32, _, S32},
1896 .OperandMapping: {{},
1897 {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm,
1898 SgprB32_M0}}});
1899
1900 addRulesForIOpcs(OpcList: {amdgcn_lds_direct_load}, FastTypes: StandardB)
1901 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, SgprB32_M0}});
1902
1903 addRulesForIOpcs(OpcList: {amdgcn_lds_param_load}, FastTypes: Standard)
1904 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Imm, SgprB32_M0}});
1905
1906 addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard)
1907 .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1908
1909 addRulesForIOpcs(OpcList: {amdgcn_readfirstlane})
1910 .Any(Init: {.Predicate: {UniB32, _, DivB32}, .OperandMapping: {{}, {SgprB32, None, VgprB32}}})
1911 // this should not exist in the first place, it is from call lowering
1912 // readfirstlaning just in case register is not in sgpr.
1913 .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}});
1914
1915 addRulesForIOpcs(OpcList: {amdgcn_readlane}, FastTypes: StandardB)
1916 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, VgprB32, SgprB32_ReadFirstLane}});
1917
1918 addRulesForIOpcs(OpcList: {amdgcn_s_quadmask, amdgcn_s_wqm}, FastTypes: StandardB)
1919 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32_ReadFirstLane}})
1920 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64_ReadFirstLane}});
1921
1922 addRulesForIOpcs(OpcList: {amdgcn_writelane}, FastTypes: StandardB)
1923 .Div(Ty: B32,
1924 RuleApplyIDs: {{VgprB32},
1925 {IntrId, SgprB32_ReadFirstLane, SgprB32_ReadFirstLane, VgprB32}});
1926
1927 addRulesForIOpcs(OpcList: {amdgcn_add_max_i32, amdgcn_add_max_u32, amdgcn_add_min_i32,
1928 amdgcn_add_min_u32},
1929 FastTypes: Standard)
1930 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1931 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1932
1933 addRulesForIOpcs(OpcList: {amdgcn_pk_add_max_i16, amdgcn_pk_add_max_u16,
1934 amdgcn_pk_add_min_i16, amdgcn_pk_add_min_u16},
1935 FastTypes: Standard)
1936 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, VgprV2S16, VgprV2S16, VgprV2S16}})
1937 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, VgprV2S16, VgprV2S16, VgprV2S16}});
1938
1939 addRulesForIOpcs(OpcList: {amdgcn_permlane16, amdgcn_permlanex16}, FastTypes: Standard)
1940 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32},
1941 {IntrId, Vgpr32, Vgpr32, SgprB32_ReadFirstLane,
1942 SgprB32_ReadFirstLane}});
1943
1944 addRulesForIOpcs(OpcList: {amdgcn_permlane_bcast, amdgcn_permlane_up,
1945 amdgcn_permlane_down, amdgcn_permlane_xor},
1946 FastTypes: StandardB)
1947 .Div(Ty: B32,
1948 RuleApplyIDs: {{VgprB32},
1949 {IntrId, VgprB32, SgprB32_ReadFirstLane, SgprB32_ReadFirstLane}});
1950
1951 addRulesForIOpcs(OpcList: {amdgcn_permlane_idx_gen}, FastTypes: Standard)
1952 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, SgprB32_ReadFirstLane}});
1953
1954 addRulesForIOpcs(OpcList: {amdgcn_perm}, FastTypes: Standard)
1955 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1956 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1957
1958 addRulesForIOpcs(
1959 OpcList: {amdgcn_wave_reduce_add, amdgcn_wave_reduce_and, amdgcn_wave_reduce_fadd,
1960 amdgcn_wave_reduce_fmax, amdgcn_wave_reduce_fmin,
1961 amdgcn_wave_reduce_fsub, amdgcn_wave_reduce_max, amdgcn_wave_reduce_min,
1962 amdgcn_wave_reduce_or, amdgcn_wave_reduce_sub, amdgcn_wave_reduce_umax,
1963 amdgcn_wave_reduce_umin, amdgcn_wave_reduce_xor},
1964 FastTypes: Standard)
1965 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
1966 .Div(Ty: S32, RuleApplyIDs: {{Sgpr32ToVgprDst}, {IntrId, VgprB32}})
1967 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64}})
1968 .Div(Ty: S64, RuleApplyIDs: {{Sgpr64ToVgprDst}, {IntrId, VgprB64}});
1969
1970 addRulesForIOpcs(OpcList: {amdgcn_wave_shuffle}, FastTypes: Standard)
1971 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1972 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1973
1974 addRulesForIOpcs(OpcList: {amdgcn_bitop3, amdgcn_fmad_ftz}, FastTypes: Standard)
1975 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1976 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1977 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1978 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1979
1980 addRulesForIOpcs(OpcList: {amdgcn_udot4, amdgcn_sdot4, amdgcn_udot8, amdgcn_sdot8,
1981 amdgcn_dot4_f32_bf8_bf8, amdgcn_dot4_f32_bf8_fp8,
1982 amdgcn_dot4_f32_fp8_fp8, amdgcn_dot4_f32_fp8_bf8},
1983 FastTypes: Standard)
1984 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1985 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1986
1987 addRulesForIOpcs(OpcList: {amdgcn_rsq, amdgcn_rsq_clamp}, FastTypes: Standard)
1988 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {IntrId, Sgpr16}}, STPred: hasPST)
1989 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}}, STPred: !hasPST)
1990 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
1991 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}}, STPred: hasPST)
1992 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}}, STPred: !hasPST)
1993 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
1994 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
1995 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
1996
1997 addRulesForIOpcs(OpcList: {amdgcn_mul_u24, amdgcn_mul_i24}, FastTypes: Standard)
1998 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1999 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
2000 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
2001 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
2002
2003 addRulesForIOpcs(OpcList: {amdgcn_ds_bpermute, amdgcn_ds_bpermute_fi_b32,
2004 amdgcn_ds_permute, amdgcn_fmul_legacy, amdgcn_mulhi_i24,
2005 amdgcn_mulhi_u24},
2006 FastTypes: Standard)
2007 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
2008 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2009
2010 addRulesForIOpcs(OpcList: {amdgcn_cvt_sr_bf8_f32, amdgcn_cvt_sr_fp8_f32,
2011 amdgcn_cvt_sr_fp8_f32_e5m3, amdgcn_cvt_pk_bf8_f32,
2012 amdgcn_cvt_pk_fp8_f32, amdgcn_cvt_pk_fp8_f32_e5m3},
2013 FastTypes: Standard)
2014 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2015 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2016
2017 addRulesForIOpcs(OpcList: {amdgcn_cvt_off_f32_i4, amdgcn_cvt_f32_bf8,
2018 amdgcn_cvt_f32_fp8, amdgcn_cvt_f32_fp8_e5m3},
2019 FastTypes: Standard)
2020 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
2021 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
2022
2023 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_f32_bf8, amdgcn_cvt_pk_f32_fp8})
2024 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {IntrId, Vgpr32}}})
2025 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, Vgpr32}}});
2026
2027 addRulesForIOpcs(OpcList: {amdgcn_cvt_f16_bf8, amdgcn_cvt_f16_fp8}, FastTypes: Standard)
2028 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr32}})
2029 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr32}});
2030
2031 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_f16_bf8, amdgcn_cvt_pk_f16_fp8}, FastTypes: Standard)
2032 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr16}})
2033 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr16}});
2034
2035 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_bf8_f16, amdgcn_cvt_pk_fp8_f16}, FastTypes: Standard)
2036 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, VgprV2S16}})
2037 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, VgprV2S16}});
2038
2039 addRulesForIOpcs(OpcList: {amdgcn_cvt_sr_bf8_f16, amdgcn_cvt_sr_fp8_f16}, FastTypes: Standard)
2040 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr16, Vgpr32, Vgpr32}})
2041 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr16, Vgpr32, Vgpr32}});
2042
2043 addRulesForIOpcs(OpcList: {amdgcn_cvt_sr_pk_f16_f32}, FastTypes: Standard)
2044 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2045 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2046
2047 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_fp8_f16})
2048 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32, Vgpr16, Vgpr32, Vgpr32}}});
2049
2050 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_fp8_f32})
2051 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
2052
2053 addRulesForIOpcs(OpcList: {amdgcn_cubesc, amdgcn_cubetc, amdgcn_cubema, amdgcn_cubeid,
2054 amdgcn_fma_legacy},
2055 FastTypes: Standard)
2056 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2057 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2058
2059 addRulesForIOpcs(OpcList: {amdgcn_frexp_mant, amdgcn_fract}, FastTypes: Standard)
2060 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
2061 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
2062 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
2063 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2064 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}})
2065 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}});
2066
2067 addRulesForIOpcs(OpcList: {amdgcn_prng_b32})
2068 .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
2069 .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}});
2070
2071 addRulesForIOpcs(OpcList: {amdgcn_sffbh}, FastTypes: Standard)
2072 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}})
2073 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
2074
2075 addRulesForIOpcs(OpcList: {amdgcn_ubfe, amdgcn_sbfe}, FastTypes: Standard)
2076 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2077 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
2078 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
2079 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
2080
2081 addRulesForIOpcs(OpcList: {amdgcn_cvt_pk_i16, amdgcn_cvt_pk_u16, amdgcn_cvt_pknorm_i16,
2082 amdgcn_cvt_pknorm_u16},
2083 FastTypes: Standard)
2084 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
2085 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
2086
2087 addRulesForIOpcs(OpcList: {amdgcn_cvt_pkrtz}, FastTypes: Standard)
2088 .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {IntrId, Sgpr32, Sgpr32}}, STPred: hasSALUFloat)
2089 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}}, STPred: !hasSALUFloat)
2090 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
2091
2092 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk32_bf6_f16,
2093 amdgcn_cvt_scalef32_sr_pk32_fp6_f16,
2094 amdgcn_cvt_scalef32_sr_pk32_bf6_bf16,
2095 amdgcn_cvt_scalef32_sr_pk32_fp6_bf16},
2096 FastTypes: Standard)
2097 .Any(Init: {.Predicate: {DivV6S32}, .OperandMapping: {{VgprV6S32}, {IntrId, VgprV32S16, Vgpr32, Vgpr32}}});
2098
2099 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk32_bf6_f32,
2100 amdgcn_cvt_scalef32_sr_pk32_fp6_f32},
2101 FastTypes: Standard)
2102 .Any(Init: {.Predicate: {DivV6S32}, .OperandMapping: {{VgprV6S32}, {IntrId, VgprV32S32, Vgpr32, Vgpr32}}});
2103
2104 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk_fp4_f16}, FastTypes: Standard)
2105 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, VgprV2S16, Vgpr32, Vgpr32}})
2106 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, VgprV2S16, Vgpr32, Vgpr32}});
2107
2108 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk_fp4_f32}, FastTypes: Standard)
2109 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, VgprV2S32, Vgpr32, Vgpr32}})
2110 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, VgprV2S32, Vgpr32, Vgpr32}});
2111
2112 addRulesForIOpcs(
2113 OpcList: {amdgcn_cvt_scalef32_2xpk16_fp6_f32, amdgcn_cvt_scalef32_2xpk16_bf6_f32})
2114 .Any(
2115 Init: {.Predicate: {DivV6S32}, .OperandMapping: {{VgprV6S32}, {IntrId, VgprV16S32, VgprV16S32, Vgpr32}}})
2116 .Any(Init: {.Predicate: {UniV6S32},
2117 .OperandMapping: {{UniInVgprV6S32}, {IntrId, VgprV16S32, VgprV16S32, Vgpr32}}});
2118
2119 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_f16_fp8, amdgcn_cvt_scalef32_f16_bf8},
2120 FastTypes: Standard)
2121 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, VgprV2S16, Vgpr32, Vgpr32}})
2122 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, VgprV2S16, Vgpr32, Vgpr32}});
2123
2124 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_f32_fp8, amdgcn_cvt_scalef32_f32_bf8},
2125 FastTypes: Standard)
2126 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
2127 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}});
2128
2129 addRulesForIOpcs(
2130 OpcList: {amdgcn_cvt_scalef32_pk16_bf6_f16, amdgcn_cvt_scalef32_pk16_fp6_f16},
2131 FastTypes: Standard)
2132 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprV16S16, Vgpr32}}})
2133 .Any(Init: {.Predicate: {UniV3S32}, .OperandMapping: {{UniInVgprV3S32}, {IntrId, VgprV16S16, Vgpr32}}});
2134
2135 addRulesForIOpcs(
2136 OpcList: {amdgcn_cvt_scalef32_pk16_bf6_f32, amdgcn_cvt_scalef32_pk16_fp6_f32},
2137 FastTypes: Standard)
2138 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprV16S32, Vgpr32}}})
2139 .Any(Init: {.Predicate: {UniV3S32}, .OperandMapping: {{UniInVgprV3S32}, {IntrId, VgprV16S32, Vgpr32}}});
2140
2141 addRulesForIOpcs(
2142 OpcList: {amdgcn_cvt_scalef32_pk8_bf8_f16, amdgcn_cvt_scalef32_pk8_fp8_f16},
2143 FastTypes: Standard)
2144 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprV8S16, Vgpr32}}})
2145 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {IntrId, VgprV8S16, Vgpr32}}});
2146
2147 addRulesForIOpcs(
2148 OpcList: {amdgcn_cvt_scalef32_pk8_bf8_f32, amdgcn_cvt_scalef32_pk8_fp8_f32},
2149 FastTypes: Standard)
2150 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprV8S32, Vgpr32}}})
2151 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {IntrId, VgprV8S32, Vgpr32}}});
2152
2153 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk8_fp4_f16}, FastTypes: Standard)
2154 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV8S16, Vgpr32}})
2155 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV8S16, Vgpr32}});
2156
2157 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk8_fp4_f32}, FastTypes: Standard)
2158 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV8S32, Vgpr32}})
2159 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV8S32, Vgpr32}});
2160
2161 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk16_bf6_f16,
2162 amdgcn_cvt_scalef32_sr_pk16_fp6_f16},
2163 FastTypes: Standard)
2164 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprV16S16, Vgpr32, Vgpr32}}})
2165 .Any(Init: {.Predicate: {UniV3S32},
2166 .OperandMapping: {{UniInVgprV3S32}, {IntrId, VgprV16S16, Vgpr32, Vgpr32}}});
2167
2168 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk16_bf6_f32,
2169 amdgcn_cvt_scalef32_sr_pk16_fp6_f32},
2170 FastTypes: Standard)
2171 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprV16S32, Vgpr32, Vgpr32}}})
2172 .Any(Init: {.Predicate: {UniV3S32},
2173 .OperandMapping: {{UniInVgprV3S32}, {IntrId, VgprV16S32, Vgpr32, Vgpr32}}});
2174
2175 addRulesForIOpcs(
2176 OpcList: {amdgcn_cvt_scalef32_sr_pk8_bf8_f16, amdgcn_cvt_scalef32_sr_pk8_fp8_f16},
2177 FastTypes: Standard)
2178 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprV8S16, Vgpr32, Vgpr32}}})
2179 .Any(Init: {.Predicate: {UniV2S32},
2180 .OperandMapping: {{UniInVgprV2S32}, {IntrId, VgprV8S16, Vgpr32, Vgpr32}}});
2181
2182 addRulesForIOpcs(
2183 OpcList: {amdgcn_cvt_scalef32_sr_pk8_bf8_f32, amdgcn_cvt_scalef32_sr_pk8_fp8_f32},
2184 FastTypes: Standard)
2185 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprV8S32, Vgpr32, Vgpr32}}})
2186 .Any(Init: {.Predicate: {UniV2S32},
2187 .OperandMapping: {{UniInVgprV2S32}, {IntrId, VgprV8S32, Vgpr32, Vgpr32}}});
2188
2189 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk8_fp4_f16}, FastTypes: Standard)
2190 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV8S16, Vgpr32, Vgpr32}})
2191 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV8S16, Vgpr32, Vgpr32}});
2192
2193 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_sr_pk8_fp4_f32}, FastTypes: Standard)
2194 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV8S32, Vgpr32, Vgpr32}})
2195 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV8S32, Vgpr32, Vgpr32}});
2196
2197 addRulesForIOpcs(
2198 OpcList: {amdgcn_cvt_scale_pk16_f16_bf6, amdgcn_cvt_scale_pk16_f16_fp6}, FastTypes: Standard)
2199 .Any(Init: {.Predicate: {DivV16S16}, .OperandMapping: {{VgprV16S16}, {IntrId, VgprV3S32, Vgpr32}}})
2200 .Any(Init: {.Predicate: {UniV16S16}, .OperandMapping: {{UniInVgprV16S16}, {IntrId, VgprV3S32, Vgpr32}}});
2201
2202 addRulesForIOpcs(
2203 OpcList: {amdgcn_cvt_scale_pk16_f32_bf6, amdgcn_cvt_scale_pk16_f32_fp6}, FastTypes: Standard)
2204 .Any(Init: {.Predicate: {DivV16S32}, .OperandMapping: {{VgprV16S32}, {IntrId, VgprV3S32, Vgpr32}}})
2205 .Any(Init: {.Predicate: {UniV16S32}, .OperandMapping: {{UniInVgprV16S32}, {IntrId, VgprV3S32, Vgpr32}}});
2206
2207 addRulesForIOpcs(OpcList: {amdgcn_cvt_scale_pk8_f16_bf8, amdgcn_cvt_scale_pk8_f16_fp8},
2208 FastTypes: Standard)
2209 .Any(Init: {.Predicate: {DivV8S16}, .OperandMapping: {{VgprV8S16}, {IntrId, VgprV2S32, Vgpr32}}})
2210 .Any(Init: {.Predicate: {UniV8S16}, .OperandMapping: {{UniInVgprV8S16}, {IntrId, VgprV2S32, Vgpr32}}});
2211
2212 addRulesForIOpcs(OpcList: {amdgcn_cvt_scale_pk8_f16_fp4}, FastTypes: Standard)
2213 .Any(Init: {.Predicate: {DivV8S16}, .OperandMapping: {{VgprV8S16}, {IntrId, Vgpr32, Vgpr32}}})
2214 .Any(Init: {.Predicate: {UniV8S16}, .OperandMapping: {{UniInVgprV8S16}, {IntrId, Vgpr32, Vgpr32}}});
2215
2216 addRulesForIOpcs(OpcList: {amdgcn_cvt_scale_pk8_f32_bf8, amdgcn_cvt_scale_pk8_f32_fp8},
2217 FastTypes: Standard)
2218 .Any(Init: {.Predicate: {DivV8S32}, .OperandMapping: {{VgprV8S32}, {IntrId, VgprV2S32, Vgpr32}}})
2219 .Any(Init: {.Predicate: {UniV8S32}, .OperandMapping: {{UniInVgprV8S32}, {IntrId, VgprV2S32, Vgpr32}}});
2220
2221 addRulesForIOpcs(OpcList: {amdgcn_cvt_scale_pk8_f32_fp4}, FastTypes: Standard)
2222 .Any(Init: {.Predicate: {DivV8S32}, .OperandMapping: {{VgprV8S32}, {IntrId, Vgpr32, Vgpr32}}})
2223 .Any(Init: {.Predicate: {UniV8S32}, .OperandMapping: {{UniInVgprV8S32}, {IntrId, Vgpr32, Vgpr32}}});
2224
2225 addRulesForIOpcs(
2226 OpcList: {amdgcn_cvt_scalef32_pk32_bf6_f16, amdgcn_cvt_scalef32_pk32_fp6_f16},
2227 FastTypes: Standard)
2228 .Any(Init: {.Predicate: {DivV6S32}, .OperandMapping: {{VgprV6S32}, {IntrId, VgprV32S16, Vgpr32}}})
2229 .Any(Init: {.Predicate: {UniV6S32}, .OperandMapping: {{UniInVgprV6S32}, {IntrId, VgprV32S16, Vgpr32}}});
2230
2231 addRulesForIOpcs(
2232 OpcList: {amdgcn_cvt_scalef32_pk_fp8_f32, amdgcn_cvt_scalef32_pk_bf8_f32},
2233 FastTypes: Standard)
2234 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, VgprV2S16, Vgpr32, Vgpr32, Vgpr32}})
2235 .Uni(Ty: V2S16,
2236 RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, VgprV2S16, Vgpr32, Vgpr32, Vgpr32}});
2237
2238 addRulesForIOpcs(
2239 OpcList: {amdgcn_cvt_scalef32_pk_f32_fp8, amdgcn_cvt_scalef32_pk_f32_bf8},
2240 FastTypes: Standard)
2241 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, Vgpr32, Vgpr32}}})
2242 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {IntrId, Vgpr32, Vgpr32}}});
2243
2244 addRulesForIOpcs(
2245 OpcList: {amdgcn_cvt_scalef32_pk_fp8_f16, amdgcn_cvt_scalef32_pk_bf8_f16},
2246 FastTypes: Standard)
2247 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}})
2248 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
2249
2250 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk_f32_fp4}, FastTypes: Standard)
2251 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, Vgpr32, Vgpr32}}})
2252 .Any(Init: {.Predicate: {UniV2S32}, .OperandMapping: {{UniInVgprV2S32}, {IntrId, Vgpr32, Vgpr32}}});
2253
2254 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk_fp4_f32}, FastTypes: Standard)
2255 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vgpr32}})
2256 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vgpr32}});
2257
2258 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk_f16_fp4,
2259 amdgcn_cvt_scalef32_pk_f16_fp8,
2260 amdgcn_cvt_scalef32_pk_f16_bf8},
2261 FastTypes: Standard)
2262 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, Vgpr32, Vgpr32}})
2263 .Uni(Ty: V2S16, RuleApplyIDs: {{UniInVgprV2S16}, {IntrId, Vgpr32, Vgpr32}});
2264
2265 addRulesForIOpcs(
2266 OpcList: {amdgcn_cvt_scalef32_pk32_f32_fp6, amdgcn_cvt_scalef32_pk32_f32_bf6},
2267 FastTypes: Standard)
2268 .Any(Init: {.Predicate: {DivV32S32}, .OperandMapping: {{VgprV32S32}, {IntrId, VgprV6S32, Vgpr32}}})
2269 .Any(Init: {.Predicate: {UniV32S32}, .OperandMapping: {{UniInVgprV32S32}, {IntrId, VgprV6S32, Vgpr32}}});
2270
2271 addRulesForIOpcs(
2272 OpcList: {amdgcn_cvt_scalef32_pk32_f16_fp6, amdgcn_cvt_scalef32_pk32_f16_bf6},
2273 FastTypes: Standard)
2274 .Any(Init: {.Predicate: {DivV32S16}, .OperandMapping: {{VgprV32S16}, {IntrId, VgprV6S32, Vgpr32}}})
2275 .Any(Init: {.Predicate: {UniV32S16}, .OperandMapping: {{UniInVgprV32S16}, {IntrId, VgprV6S32, Vgpr32}}});
2276
2277 addRulesForIOpcs(OpcList: {amdgcn_cvt_scalef32_pk_fp4_f16}, FastTypes: Standard)
2278 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, VgprV2S16, Vgpr32}})
2279 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, VgprV2S16, Vgpr32}});
2280
2281 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b64})
2282 .Any(Init: {.Predicate: {DivB64, _, UniP1}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
2283 .Any(Init: {.Predicate: {DivB64, _, DivP1}, .OperandMapping: {{VgprB64}, {IntrId, VgprP1}}})
2284 .Any(Init: {.Predicate: {DivB32, _, UniP1}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1}}})
2285 .Any(Init: {.Predicate: {DivB32, _, DivP1}, .OperandMapping: {{VgprB32}, {IntrId, VgprP1}}});
2286
2287 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr_b128})
2288 .Any(Init: {.Predicate: {DivB64, _, UniP1}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1}}})
2289 .Any(Init: {.Predicate: {DivB64, _, DivP1}, .OperandMapping: {{VgprB64}, {IntrId, VgprP1}}})
2290 .Any(Init: {.Predicate: {DivB128, _, UniP1}, .OperandMapping: {{VgprB128}, {IntrId, SgprP1}}})
2291 .Any(Init: {.Predicate: {DivB128, _, DivP1}, .OperandMapping: {{VgprB128}, {IntrId, VgprP1}}});
2292
2293 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr4_b64})
2294 .Any(Init: {.Predicate: {DivV2S32, _, UniP1}, .OperandMapping: {{VgprV2S32}, {IntrId, SgprP1}}})
2295 .Any(Init: {.Predicate: {DivV2S32, _, DivP1}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprP1}}});
2296
2297 addRulesForIOpcs(OpcList: {amdgcn_global_load_tr6_b96})
2298 .Any(Init: {.Predicate: {DivV3S32, _, UniP1}, .OperandMapping: {{VgprV3S32}, {IntrId, SgprP1}}})
2299 .Any(Init: {.Predicate: {DivV3S32, _, DivP1}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprP1}}});
2300
2301 addRulesForIOpcs(OpcList: {amdgcn_ds_load_tr4_b64, amdgcn_ds_load_tr8_b64})
2302 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprP3}}});
2303
2304 addRulesForIOpcs(OpcList: {amdgcn_ds_load_tr6_b96})
2305 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprP3}}});
2306
2307 addRulesForIOpcs(OpcList: {amdgcn_ds_load_tr16_b128})
2308 .Any(Init: {.Predicate: {DivB128}, .OperandMapping: {{VgprB128}, {IntrId, VgprP3}}});
2309
2310 addRulesForIOpcs(OpcList: {amdgcn_global_atomic_ordered_add_b64})
2311 .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
2312
2313 addRulesForIOpcs(
2314 OpcList: {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, FastTypes: Standard)
2315 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
2316
2317 addRulesForIOpcs(OpcList: {amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
2318 FastTypes: Standard)
2319 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
2320
2321 addRulesForIOpcs(OpcList: {amdgcn_raw_buffer_load_lds})
2322 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
2323
2324 addRulesForIOpcs(OpcList: {amdgcn_raw_buffer_load_async_lds})
2325 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprV4S32, SgprB32_M0, Imm, Vgpr32, Sgpr32}}});
2326
2327 addRulesForIOpcs(OpcList: {amdgcn_struct_buffer_load_async_lds})
2328 .Any(
2329 Init: {.Predicate: {_},
2330 .OperandMapping: {{}, {IntrId, SgprV4S32, SgprB32_M0, Imm, Vgpr32, Vgpr32, Sgpr32}}});
2331
2332 addRulesForIOpcs(OpcList: {amdgcn_struct_buffer_load_lds})
2333 .Any(Init: {.Predicate: {_},
2334 .OperandMapping: {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
2335
2336 addRulesForIOpcs(OpcList: {amdgcn_raw_ptr_buffer_load_lds})
2337 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
2338
2339 addRulesForIOpcs(OpcList: {amdgcn_raw_ptr_buffer_load_async_lds})
2340 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprP8, SgprB32_M0, Imm, VgprB32, SgprB32}}});
2341
2342 addRulesForIOpcs(OpcList: {amdgcn_struct_ptr_buffer_load_async_lds})
2343 .Any(Init: {.Predicate: {_},
2344 .OperandMapping: {{}, {IntrId, SgprP8, SgprB32_M0, Imm, Vgpr32, Vgpr32, Sgpr32}}});
2345
2346 addRulesForIOpcs(OpcList: {amdgcn_struct_ptr_buffer_load_lds})
2347 .Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
2348
2349 addRulesForIOpcs(
2350 OpcList: {amdgcn_global_load_lds, amdgcn_load_to_lds, amdgcn_load_async_to_lds})
2351 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1, SgprB32_M0}}});
2352
2353 addRulesForIOpcs(OpcList: {amdgcn_global_load_async_to_lds_b8,
2354 amdgcn_global_load_async_to_lds_b32,
2355 amdgcn_global_load_async_to_lds_b64,
2356 amdgcn_global_load_async_to_lds_b128,
2357 amdgcn_global_store_async_from_lds_b8,
2358 amdgcn_global_store_async_from_lds_b32,
2359 amdgcn_global_store_async_from_lds_b64,
2360 amdgcn_global_store_async_from_lds_b128})
2361 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1, VgprP3}}});
2362
2363 addRulesForIOpcs(OpcList: {amdgcn_global_load_async_lds})
2364 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1, SgprB32_M0}}});
2365
2366 addRulesForIOpcs(OpcList: {amdgcn_tensor_load_to_lds, amdgcn_tensor_store_from_lds})
2367 .Any(Init: {.Predicate: {},
2368 .OperandMapping: {{},
2369 {IntrId, SgprV4S32_ReadFirstLane, SgprV8S32_ReadFirstLane,
2370 SgprV4S32_ReadFirstLane, SgprV4S32_ReadFirstLane,
2371 SgprV8S32_ReadFirstLane}}});
2372
2373 addRulesForIOpcs(OpcList: {amdgcn_cluster_load_b32})
2374 .Any(Init: {.Predicate: {UniB32}, .OperandMapping: {{UniInVgprB32}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2375 .Any(Init: {.Predicate: {DivB32, _, UniP1}, .OperandMapping: {{VgprB32}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2376 .Any(
2377 Init: {.Predicate: {DivB32, _, DivP1}, .OperandMapping: {{VgprB32}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
2378
2379 addRulesForIOpcs(OpcList: {amdgcn_cluster_load_b64})
2380 .Any(Init: {.Predicate: {UniB64}, .OperandMapping: {{UniInVgprB64}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2381 .Any(Init: {.Predicate: {DivB64, _, UniP1}, .OperandMapping: {{VgprB64}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2382 .Any(
2383 Init: {.Predicate: {DivB64, _, DivP1}, .OperandMapping: {{VgprB64}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
2384
2385 addRulesForIOpcs(OpcList: {amdgcn_cluster_load_b128})
2386 .Any(Init: {.Predicate: {UniB128}, .OperandMapping: {{UniInVgprB128}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2387 .Any(Init: {.Predicate: {DivB128, _, UniP1},
2388 .OperandMapping: {{VgprB128}, {IntrId, SgprP1, Imm, SgprB32_M0}}})
2389 .Any(Init: {.Predicate: {DivB128, _, DivP1},
2390 .OperandMapping: {{VgprB128}, {IntrId, VgprP1, Imm, SgprB32_M0}}});
2391
2392 addRulesForIOpcs(OpcList: {amdgcn_cluster_load_async_to_lds_b8,
2393 amdgcn_cluster_load_async_to_lds_b32,
2394 amdgcn_cluster_load_async_to_lds_b64,
2395 amdgcn_cluster_load_async_to_lds_b128})
2396 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP1, VgprP3, Imm, Imm, SgprB32_M0}}});
2397
2398 addRulesForIOpcs(OpcList: {amdgcn_perm_pk16_b4_u4}, FastTypes: StandardB)
2399 .Uni(Ty: B64, RuleApplyIDs: {{UniInVgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}})
2400 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, Vgpr32, Vgpr32, VgprV2S32}});
2401
2402 addRulesForIOpcs(OpcList: {amdgcn_perm_pk16_b6_u4}, FastTypes: StandardB)
2403 .Uni(Ty: B96, RuleApplyIDs: {{UniInVgprB96}, {IntrId, Vgpr32, VgprB64, VgprV2S32}})
2404 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, Vgpr32, VgprB64, VgprV2S32}});
2405
2406 addRulesForIOpcs(OpcList: {amdgcn_perm_pk16_b8_u4}, FastTypes: StandardB)
2407 .Uni(Ty: B128, RuleApplyIDs: {{UniInVgprB128}, {IntrId, VgprB64, VgprB64, VgprV2S32}})
2408 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB64, VgprB64, VgprV2S32}});
2409
2410 addRulesForIOpcs(OpcList: {amdgcn_wwm, amdgcn_strict_wwm, amdgcn_wqm, amdgcn_softwqm,
2411 amdgcn_strict_wqm},
2412 FastTypes: StandardB)
2413 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
2414 .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {IntrId, SgprB32}})
2415 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}})
2416 .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {IntrId, SgprB64}})
2417 .Div(Ty: B96, RuleApplyIDs: {{VgprB96}, {IntrId, VgprB96}})
2418 .Uni(Ty: B96, RuleApplyIDs: {{SgprB96}, {IntrId, SgprB96}})
2419 .Div(Ty: B128, RuleApplyIDs: {{VgprB128}, {IntrId, VgprB128}})
2420 .Uni(Ty: B128, RuleApplyIDs: {{SgprB128}, {IntrId, SgprB128}})
2421 .Any(Init: {.Predicate: {UniB256}, .OperandMapping: {{SgprB256}, {IntrId, SgprB256}}})
2422 .Any(Init: {.Predicate: {DivB256}, .OperandMapping: {{VgprB256}, {IntrId, VgprB256}}})
2423 .Any(Init: {.Predicate: {UniB512}, .OperandMapping: {{SgprB512}, {IntrId, SgprB512}}})
2424 .Any(Init: {.Predicate: {DivB512}, .OperandMapping: {{VgprB512}, {IntrId, VgprB512}}});
2425
2426 addRulesForIOpcs(OpcList: {amdgcn_init_whole_wave}).Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {IntrId}}});
2427
2428 addRulesForIOpcs(OpcList: {amdgcn_kill, amdgcn_wqm_demote})
2429 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Vcc}}});
2430
2431 addRulesForIOpcs(OpcList: {amdgcn_set_inactive}, FastTypes: StandardB)
2432 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32, VgprB32}});
2433
2434 addRulesForIOpcs(OpcList: {amdgcn_set_inactive_chain_arg}, FastTypes: Standard)
2435 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2436
2437 addRulesForIOpcs(OpcList: {amdgcn_cvt_sr_bf16_f32, amdgcn_cvt_sr_f16_f32}, FastTypes: Standard)
2438 .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {IntrId, VgprV2S16, Vgpr32, Vgpr32}});
2439
2440 addRulesForIOpcs(OpcList: {amdgcn_ballot}, FastTypes: Standard)
2441 .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {IntrId, Vcc}})
2442 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc}});
2443
2444 addRulesForIOpcs(OpcList: {amdgcn_inverse_ballot})
2445 .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {IntrId, SgprB32_ReadFirstLane}}})
2446 .Any(Init: {.Predicate: {DivS1, _, S64}, .OperandMapping: {{Vcc}, {IntrId, SgprB64_ReadFirstLane}}});
2447
2448 addRulesForIOpcs(OpcList: {amdgcn_live_mask, amdgcn_ps_live})
2449 .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {}}});
2450
2451 addRulesForIOpcs(OpcList: {amdgcn_mov_dpp, amdgcn_mov_dpp8}, FastTypes: StandardB)
2452 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}})
2453 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64}});
2454
2455 addRulesForIOpcs(OpcList: {amdgcn_update_dpp}, FastTypes: StandardB)
2456 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32, VgprB32}})
2457 .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {IntrId, VgprB64, VgprB64}});
2458
2459 addRulesForIOpcs(OpcList: {amdgcn_sin, amdgcn_cos}, FastTypes: Standard)
2460 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
2461 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}})
2462 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2463 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}});
2464
2465 addRulesForIOpcs(OpcList: {amdgcn_trig_preop}, FastTypes: Standard)
2466 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr32}})
2467 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr32}});
2468
2469 addRulesForIOpcs(OpcList: {amdgcn_exp2}, FastTypes: Standard)
2470 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
2471 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {IntrId, Sgpr16}}, STPred: hasPST)
2472 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}}, STPred: !hasPST)
2473 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2474 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}}, STPred: hasPST)
2475 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}}, STPred: !hasPST);
2476
2477 addRulesForIOpcs(OpcList: {amdgcn_rcp, amdgcn_sqrt}, FastTypes: Standard)
2478 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
2479 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {IntrId, Sgpr16}}, STPred: hasPST)
2480 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}}, STPred: !hasPST)
2481 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2482 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}}, STPred: hasPST)
2483 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}}, STPred: !hasPST)
2484 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64}})
2485 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64}});
2486
2487 addRulesForIOpcs(OpcList: {amdgcn_log}, FastTypes: Standard)
2488 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16}})
2489 .Uni(Ty: S16, RuleApplyIDs: {{Sgpr16}, {IntrId, Sgpr16}}, STPred: hasPST)
2490 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16}}, STPred: !hasPST)
2491 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2492 .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Sgpr32}}, STPred: hasPST)
2493 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}}, STPred: !hasPST);
2494
2495 addRulesForIOpcs(OpcList: {amdgcn_ds_atomic_async_barrier_arrive_b64})
2496 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, VgprP3}}});
2497
2498 addRulesForIOpcs(OpcList: {amdgcn_ds_atomic_barrier_arrive_rtn_b64}, FastTypes: Standard)
2499 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, VgprP3, Vgpr64}});
2500
2501 addRulesForIOpcs(OpcList: {amdgcn_ds_add_gs_reg_rtn, amdgcn_ds_sub_gs_reg_rtn},
2502 FastTypes: Standard)
2503 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}})
2504 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr32}});
2505
2506 addRulesForIOpcs(OpcList: {amdgcn_ds_append, amdgcn_ds_consume}, FastTypes: Standard)
2507 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, SgprB32_M0}})
2508 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, SgprB32_M0}});
2509
2510 addRulesForIOpcs(
2511 OpcList: {amdgcn_ds_bvh_stack_rtn, amdgcn_ds_bvh_stack_push4_pop1_rtn}, FastTypes: Standard)
2512 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV4S32}});
2513
2514 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop1_rtn}, FastTypes: Standard)
2515 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2516
2517 addRulesForIOpcs(OpcList: {amdgcn_ds_bvh_stack_push8_pop2_rtn}, FastTypes: Standard)
2518 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vgpr32}, {IntrId, Vgpr32, Vgpr32, VgprV8S32}});
2519
2520 addRulesForIOpcs(OpcList: {amdgcn_ds_gws_sema_p, amdgcn_ds_gws_sema_v,
2521 amdgcn_ds_gws_sema_release_all})
2522 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, SgprB32_M0}}});
2523
2524 addRulesForIOpcs(
2525 OpcList: {amdgcn_ds_gws_barrier, amdgcn_ds_gws_init, amdgcn_ds_gws_sema_br})
2526 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {IntrId, Vgpr32, SgprB32_M0}}});
2527
2528 addRulesForIOpcs(OpcList: {amdgcn_ds_ordered_add, amdgcn_ds_ordered_swap}, FastTypes: Standard)
2529 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, SgprB32_M0, Vgpr32}});
2530
2531 addRulesForIOpcs(OpcList: {amdgcn_ds_swizzle}, FastTypes: Standard)
2532 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32}})
2533 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32}});
2534
2535 addRulesForIOpcs(OpcList: {amdgcn_permlane16_var, amdgcn_permlanex16_var}, FastTypes: Standard)
2536 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2537
2538 addRulesForIOpcs(OpcList: {amdgcn_permlane16_swap, amdgcn_permlane32_swap}, FastTypes: Standard)
2539 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
2540
2541 addRulesForIOpcs(OpcList: {amdgcn_permlane64}, FastTypes: StandardB)
2542 .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {IntrId, VgprB32}});
2543
2544 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr4_b64, amdgcn_ds_read_tr8_b64})
2545 .Any(Init: {.Predicate: {DivV2S32}, .OperandMapping: {{VgprV2S32}, {IntrId, VgprP3}}});
2546
2547 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr6_b96})
2548 .Any(Init: {.Predicate: {DivV3S32}, .OperandMapping: {{VgprV3S32}, {IntrId, VgprP3}}});
2549
2550 addRulesForIOpcs(OpcList: {amdgcn_ds_read_tr16_b64})
2551 .Any(Init: {.Predicate: {DivV4S16}, .OperandMapping: {{VgprV4S16}, {IntrId, VgprP3}}});
2552
2553 addRulesForIOpcs(OpcList: {amdgcn_interp_p1}, FastTypes: Standard)
2554 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, SgprB32_M0}});
2555
2556 addRulesForIOpcs(OpcList: {amdgcn_interp_p1_f16}, FastTypes: Standard)
2557 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Imm, Imm, Imm, SgprB32_M0}});
2558
2559 addRulesForIOpcs(OpcList: {amdgcn_interp_p2}, FastTypes: Standard)
2560 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Imm, Imm, SgprB32_M0}});
2561
2562 addRulesForIOpcs(OpcList: {amdgcn_interp_p2_f16}, FastTypes: Standard)
2563 .Div(Ty: S16,
2564 RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Imm, Imm, Imm, SgprB32_M0}});
2565
2566 addRulesForIOpcs(OpcList: {amdgcn_interp_mov}, FastTypes: Standard)
2567 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Imm, Imm, SgprB32_M0}});
2568
2569 addRulesForIOpcs(OpcList: {amdgcn_interp_inreg_p10, amdgcn_interp_inreg_p2,
2570 amdgcn_interp_inreg_p10_f16, amdgcn_interp_p10_rtz_f16},
2571 FastTypes: Standard)
2572 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2573 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2574
2575 addRulesForIOpcs(OpcList: {amdgcn_interp_inreg_p2_f16, amdgcn_interp_p2_rtz_f16},
2576 FastTypes: Standard)
2577 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2578 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
2579
2580 addRulesForIOpcs(OpcList: {amdgcn_frexp_exp})
2581 .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{UniInVgprS16}, {IntrId, Vgpr16}}})
2582 .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {IntrId, Vgpr16}}})
2583 .Any(Init: {.Predicate: {UniS32, _, S32}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr32}}})
2584 .Any(Init: {.Predicate: {DivS32, _, S32}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr32}}})
2585 .Any(Init: {.Predicate: {UniS32, _, S64}, .OperandMapping: {{UniInVgprS32}, {IntrId, Vgpr64}}})
2586 .Any(Init: {.Predicate: {DivS32, _, S64}, .OperandMapping: {{Vgpr32}, {IntrId, Vgpr64}}});
2587
2588 addRulesForIOpcs(OpcList: {amdgcn_div_fmas}, FastTypes: Standard)
2589 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2590 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32, Vcc}})
2591 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}})
2592 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64, Vcc}});
2593
2594 addRulesForIOpcs(OpcList: {amdgcn_div_fixup}, FastTypes: Standard)
2595 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2596 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
2597 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2598 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
2599 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}})
2600 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64}, {IntrId, Vgpr64, Vgpr64, Vgpr64}});
2601
2602 addRulesForIOpcs(OpcList: {amdgcn_div_scale}, FastTypes: Standard)
2603 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32, Vcc}, {IntrId, Vgpr32, Vgpr32}})
2604 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32, UniInVcc}, {IntrId, Vgpr32, Vgpr32}})
2605 .Div(Ty: S64, RuleApplyIDs: {{Vgpr64, Vcc}, {IntrId, Vgpr64, Vgpr64}})
2606 .Uni(Ty: S64, RuleApplyIDs: {{UniInVgprS64, UniInVcc}, {IntrId, Vgpr64, Vgpr64}});
2607
2608 addRulesForIOpcs(OpcList: {amdgcn_fdot2, amdgcn_sdot2, amdgcn_udot2}, FastTypes: Standard)
2609 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}})
2610 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, VgprV2S16, VgprV2S16, Vgpr32}});
2611
2612 addRulesForIOpcs(OpcList: {amdgcn_fdot2_f16_f16}, FastTypes: Standard)
2613 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr16}})
2614 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, VgprV2S16, VgprV2S16, Vgpr16}});
2615
2616 addRulesForIOpcs(OpcList: {amdgcn_sudot4, amdgcn_sudot8}, FastTypes: Standard)
2617 .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}})
2618 .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {IntrId, Imm, Vgpr32, Imm, Vgpr32, Vgpr32}});
2619
2620 addRulesForIOpcs(OpcList: {amdgcn_s_alloc_vgpr})
2621 .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {IntrId, SgprB32_ReadFirstLane}}});
2622
2623 addRulesForIOpcs(OpcList: {amdgcn_sat_pk4_i4_i8, amdgcn_sat_pk4_u4_u8}, FastTypes: Standard)
2624 .Uni(Ty: S16, RuleApplyIDs: {{UniInVgprS16}, {IntrId, Vgpr32}})
2625 .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {IntrId, Vgpr32}});
2626
2627 bool HasGFX90AInsts = ST->hasGFX90AInsts();
2628
2629 // On gfx90a+ both AGPR-form and VGPR-form exists
2630 addRulesForIOpcs(OpcList: {amdgcn_mfma_f32_32x32x1f32, amdgcn_mfma_f32_16x16x1f32,
2631 amdgcn_mfma_f32_4x4x1f32, amdgcn_mfma_f32_32x32x2f32,
2632 amdgcn_mfma_f32_16x16x4f32, amdgcn_mfma_f32_32x32x4f16,
2633 amdgcn_mfma_f32_16x16x4f16, amdgcn_mfma_f32_4x4x4f16,
2634 amdgcn_mfma_f32_32x32x8f16, amdgcn_mfma_f32_16x16x16f16,
2635 amdgcn_mfma_i32_32x32x4i8, amdgcn_mfma_i32_16x16x4i8,
2636 amdgcn_mfma_i32_4x4x4i8, amdgcn_mfma_i32_32x32x8i8,
2637 amdgcn_mfma_i32_16x16x16i8, amdgcn_mfma_f32_32x32x2bf16,
2638 amdgcn_mfma_f32_16x16x2bf16, amdgcn_mfma_f32_4x4x2bf16,
2639 amdgcn_mfma_f32_32x32x4bf16, amdgcn_mfma_f32_16x16x8bf16})
2640 .Any(Init: {.Predicate: {DivAnyTy},
2641 .OperandMapping: {{AgprAnyTy}, {IntrId, VgprAnyTy, VgprAnyTy, AgprAnyTy}}},
2642 STPred: !HasGFX90AInsts)
2643 .Any(Init: {.Predicate: {DivAnyTy},
2644 .OperandMapping: {{VgprOrAgprAnyTy},
2645 {IntrId, VgprAnyTy, VgprAnyTy, VgprOrAgprAnyTy}}},
2646 STPred: HasGFX90AInsts);
2647
2648 // gfx90a+ only MFMAs
2649 addRulesForIOpcs(
2650 OpcList: {
2651 amdgcn_mfma_f32_32x32x4bf16_1k,
2652 amdgcn_mfma_f32_16x16x4bf16_1k,
2653 amdgcn_mfma_f32_4x4x4bf16_1k,
2654 amdgcn_mfma_f32_32x32x8bf16_1k,
2655 amdgcn_mfma_f32_16x16x16bf16_1k,
2656 amdgcn_mfma_f64_16x16x4f64,
2657 amdgcn_mfma_f64_4x4x4f64,
2658 amdgcn_mfma_i32_16x16x32_i8,
2659 amdgcn_mfma_i32_32x32x16_i8,
2660 amdgcn_mfma_f32_16x16x8_xf32,
2661 amdgcn_mfma_f32_32x32x4_xf32,
2662 amdgcn_mfma_f32_16x16x32_bf8_bf8,
2663 amdgcn_mfma_f32_16x16x32_bf8_fp8,
2664 amdgcn_mfma_f32_16x16x32_fp8_bf8,
2665 amdgcn_mfma_f32_16x16x32_fp8_fp8,
2666 amdgcn_mfma_f32_32x32x16_bf8_bf8,
2667 amdgcn_mfma_f32_32x32x16_bf8_fp8,
2668 amdgcn_mfma_f32_32x32x16_fp8_bf8,
2669 amdgcn_mfma_f32_32x32x16_fp8_fp8,
2670 // gfx950
2671 amdgcn_mfma_f32_16x16x32_f16,
2672 amdgcn_mfma_f32_32x32x16_f16,
2673 amdgcn_mfma_i32_16x16x64_i8,
2674 amdgcn_mfma_i32_32x32x32_i8,
2675 // TODO: bf16 variants fail in IRTranslator.
2676 // amdgcn_mfma_f32_16x16x32_bf16, amdgcn_mfma_f32_32x32x16_bf16,
2677 })
2678 .Any(Init: {.Predicate: {DivAnyTy},
2679 .OperandMapping: {{VgprOrAgprAnyTy},
2680 {IntrId, VgprAnyTy, VgprAnyTy, VgprOrAgprAnyTy}}});
2681
2682 addRulesForIOpcs(
2683 OpcList: {// gfx942+
2684 amdgcn_smfmac_f32_16x16x32_f16, amdgcn_smfmac_f32_32x32x16_f16,
2685 amdgcn_smfmac_f32_16x16x32_bf16, amdgcn_smfmac_f32_32x32x16_bf16,
2686 amdgcn_smfmac_i32_16x16x64_i8, amdgcn_smfmac_i32_32x32x32_i8,
2687 amdgcn_smfmac_f32_16x16x64_bf8_bf8, amdgcn_smfmac_f32_16x16x64_bf8_fp8,
2688 amdgcn_smfmac_f32_16x16x64_fp8_bf8, amdgcn_smfmac_f32_16x16x64_fp8_fp8,
2689 amdgcn_smfmac_f32_32x32x32_bf8_bf8, amdgcn_smfmac_f32_32x32x32_bf8_fp8,
2690 amdgcn_smfmac_f32_32x32x32_fp8_bf8, amdgcn_smfmac_f32_32x32x32_fp8_fp8,
2691 // gfx950+
2692 amdgcn_smfmac_f32_16x16x64_f16, amdgcn_smfmac_f32_32x32x32_f16,
2693 amdgcn_smfmac_i32_16x16x128_i8, amdgcn_smfmac_i32_32x32x64_i8,
2694 amdgcn_smfmac_f32_16x16x128_bf8_bf8, amdgcn_smfmac_f32_16x16x128_bf8_fp8,
2695 amdgcn_smfmac_f32_16x16x128_fp8_bf8, amdgcn_smfmac_f32_16x16x128_fp8_fp8,
2696 amdgcn_smfmac_f32_32x32x64_bf8_bf8, amdgcn_smfmac_f32_32x32x64_bf8_fp8,
2697 amdgcn_smfmac_f32_32x32x64_fp8_bf8, amdgcn_smfmac_f32_32x32x64_fp8_fp8})
2698 .Any(Init: {.Predicate: {DivAnyTy},
2699 .OperandMapping: {{VgprOrAgprAnyTy},
2700 {IntrId, VgprAnyTy, VgprAnyTy, VgprOrAgprAnyTy, VgprAnyTy}}});
2701
2702 addRulesForIOpcs(OpcList: {amdgcn_mfma_scale_f32_32x32x64_f8f6f4,
2703 amdgcn_mfma_scale_f32_16x16x128_f8f6f4})
2704 .Any(Init: {.Predicate: {DivAnyTy},
2705 .OperandMapping: {{VgprOrAgprAnyTy},
2706 {IntrId, VgprAnyTy, VgprAnyTy, VgprOrAgprAnyTy, Imm, Imm, Imm,
2707 Vgpr32, Imm, Vgpr32}}});
2708
2709 // WMMA/SWMMAC intrinsics: all register operands map to VGPR.
2710 addRulesForIOpcs(
2711 OpcList: {// WMMA GFX11+
2712 amdgcn_wmma_f32_16x16x16_f16, amdgcn_wmma_f32_16x16x16_bf16,
2713 amdgcn_wmma_f16_16x16x16_f16, amdgcn_wmma_bf16_16x16x16_bf16,
2714 amdgcn_wmma_f16_16x16x16_f16_tied, amdgcn_wmma_bf16_16x16x16_bf16_tied,
2715 amdgcn_wmma_i32_16x16x16_iu8, amdgcn_wmma_i32_16x16x16_iu4,
2716 // WMMA GFX12
2717 amdgcn_wmma_f32_16x16x16_fp8_fp8, amdgcn_wmma_f32_16x16x16_fp8_bf8,
2718 amdgcn_wmma_f32_16x16x16_bf8_fp8, amdgcn_wmma_f32_16x16x16_bf8_bf8,
2719 amdgcn_wmma_i32_16x16x32_iu4,
2720 // WMMA GFX1250
2721 amdgcn_wmma_f32_16x16x4_f32, amdgcn_wmma_f32_16x16x32_bf16,
2722 amdgcn_wmma_f32_16x16x32_f16, amdgcn_wmma_f16_16x16x32_f16,
2723 amdgcn_wmma_bf16_16x16x32_bf16, amdgcn_wmma_bf16f32_16x16x32_bf16,
2724 amdgcn_wmma_f32_16x16x64_fp8_fp8, amdgcn_wmma_f32_16x16x64_fp8_bf8,
2725 amdgcn_wmma_f32_16x16x64_bf8_fp8, amdgcn_wmma_f32_16x16x64_bf8_bf8,
2726 amdgcn_wmma_f16_16x16x64_fp8_fp8, amdgcn_wmma_f16_16x16x64_fp8_bf8,
2727 amdgcn_wmma_f16_16x16x64_bf8_fp8, amdgcn_wmma_f16_16x16x64_bf8_bf8,
2728 amdgcn_wmma_f16_16x16x128_fp8_fp8, amdgcn_wmma_f16_16x16x128_fp8_bf8,
2729 amdgcn_wmma_f16_16x16x128_bf8_fp8, amdgcn_wmma_f16_16x16x128_bf8_bf8,
2730 amdgcn_wmma_f32_16x16x128_fp8_fp8, amdgcn_wmma_f32_16x16x128_fp8_bf8,
2731 amdgcn_wmma_f32_16x16x128_bf8_fp8, amdgcn_wmma_f32_16x16x128_bf8_bf8,
2732 amdgcn_wmma_i32_16x16x64_iu8, amdgcn_wmma_f32_16x16x128_f8f6f4,
2733 amdgcn_wmma_scale_f32_16x16x128_f8f6f4,
2734 amdgcn_wmma_scale16_f32_16x16x128_f8f6f4, amdgcn_wmma_f32_32x16x128_f4,
2735 amdgcn_wmma_scale_f32_32x16x128_f4, amdgcn_wmma_scale16_f32_32x16x128_f4,
2736 // WMMA GFX1251
2737 amdgcn_wmma_f64_16x16x4_f64,
2738 // SWMMAC GFX12
2739 amdgcn_swmmac_f32_16x16x32_f16, amdgcn_swmmac_f32_16x16x32_bf16,
2740 amdgcn_swmmac_f16_16x16x32_f16, amdgcn_swmmac_bf16_16x16x32_bf16,
2741 amdgcn_swmmac_i32_16x16x32_iu8, amdgcn_swmmac_i32_16x16x32_iu4,
2742 amdgcn_swmmac_i32_16x16x64_iu4, amdgcn_swmmac_f32_16x16x32_fp8_fp8,
2743 amdgcn_swmmac_f32_16x16x32_fp8_bf8, amdgcn_swmmac_f32_16x16x32_bf8_fp8,
2744 amdgcn_swmmac_f32_16x16x32_bf8_bf8,
2745 // SWMMAC GFX1250
2746 amdgcn_swmmac_f32_16x16x64_f16, amdgcn_swmmac_f32_16x16x64_bf16,
2747 amdgcn_swmmac_f16_16x16x64_f16, amdgcn_swmmac_bf16_16x16x64_bf16,
2748 amdgcn_swmmac_bf16f32_16x16x64_bf16, amdgcn_swmmac_f32_16x16x128_fp8_fp8,
2749 amdgcn_swmmac_f32_16x16x128_fp8_bf8, amdgcn_swmmac_f32_16x16x128_bf8_fp8,
2750 amdgcn_swmmac_f32_16x16x128_bf8_bf8, amdgcn_swmmac_f16_16x16x128_fp8_fp8,
2751 amdgcn_swmmac_f16_16x16x128_fp8_bf8, amdgcn_swmmac_f16_16x16x128_bf8_fp8,
2752 amdgcn_swmmac_f16_16x16x128_bf8_bf8, amdgcn_swmmac_i32_16x16x128_iu8})
2753 .Any(Init: {.Predicate: {}, .OperandMapping: {{}, {}, ApplyAllVgpr}});
2754
2755} // end initialize rules
2756