1 | //===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// Definitions of RegBankLegalize Rules for all opcodes. |
10 | /// Implementation of container for all the Rules and search. |
11 | /// Fast search for most common case when Rule.Predicate checks LLT and |
12 | /// uniformity of register in operand 0. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include "AMDGPURegBankLegalizeRules.h" |
17 | #include "AMDGPUInstrInfo.h" |
18 | #include "GCNSubtarget.h" |
19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
20 | #include "llvm/CodeGen/MachineUniformityAnalysis.h" |
21 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
22 | #include "llvm/Support/AMDGPUAddrSpace.h" |
23 | |
24 | #define DEBUG_TYPE "amdgpu-regbanklegalize" |
25 | |
26 | using namespace llvm; |
27 | using namespace AMDGPU; |
28 | |
29 | bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) { |
30 | return Ty.isPointer() && Ty.getSizeInBits() == Width; |
31 | } |
32 | |
33 | RegBankLLTMapping::RegBankLLTMapping( |
34 | std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList, |
35 | std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList, |
36 | LoweringMethodID LoweringMethod) |
37 | : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList), |
38 | LoweringMethod(LoweringMethod) {} |
39 | |
40 | PredicateMapping::PredicateMapping( |
41 | std::initializer_list<UniformityLLTOpPredicateID> OpList, |
42 | std::function<bool(const MachineInstr &)> TestFunc) |
43 | : OpUniformityAndTypes(OpList), TestFunc(TestFunc) {} |
44 | |
45 | bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, |
46 | const MachineUniformityInfo &MUI, |
47 | const MachineRegisterInfo &MRI) { |
48 | switch (UniID) { |
49 | case S1: |
50 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1); |
51 | case S16: |
52 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16); |
53 | case S32: |
54 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32); |
55 | case S64: |
56 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64); |
57 | case S128: |
58 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128); |
59 | case P0: |
60 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64); |
61 | case P1: |
62 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64); |
63 | case P3: |
64 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32); |
65 | case P4: |
66 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64); |
67 | case P5: |
68 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32); |
69 | case Ptr32: |
70 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 32); |
71 | case Ptr64: |
72 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 64); |
73 | case Ptr128: |
74 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 128); |
75 | case V2S32: |
76 | return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32); |
77 | case V4S32: |
78 | return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32); |
79 | case B32: |
80 | return MRI.getType(Reg).getSizeInBits() == 32; |
81 | case B64: |
82 | return MRI.getType(Reg).getSizeInBits() == 64; |
83 | case B96: |
84 | return MRI.getType(Reg).getSizeInBits() == 96; |
85 | case B128: |
86 | return MRI.getType(Reg).getSizeInBits() == 128; |
87 | case B256: |
88 | return MRI.getType(Reg).getSizeInBits() == 256; |
89 | case B512: |
90 | return MRI.getType(Reg).getSizeInBits() == 512; |
91 | case UniS1: |
92 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isUniform(V: Reg); |
93 | case UniS16: |
94 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isUniform(V: Reg); |
95 | case UniS32: |
96 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isUniform(V: Reg); |
97 | case UniS64: |
98 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isUniform(V: Reg); |
99 | case UniS128: |
100 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isUniform(V: Reg); |
101 | case UniP0: |
102 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isUniform(V: Reg); |
103 | case UniP1: |
104 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isUniform(V: Reg); |
105 | case UniP3: |
106 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isUniform(V: Reg); |
107 | case UniP4: |
108 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isUniform(V: Reg); |
109 | case UniP5: |
110 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isUniform(V: Reg); |
111 | case UniPtr32: |
112 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isUniform(V: Reg); |
113 | case UniPtr64: |
114 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isUniform(V: Reg); |
115 | case UniPtr128: |
116 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isUniform(V: Reg); |
117 | case UniV2S16: |
118 | return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isUniform(V: Reg); |
119 | case UniB32: |
120 | return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(V: Reg); |
121 | case UniB64: |
122 | return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(V: Reg); |
123 | case UniB96: |
124 | return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(V: Reg); |
125 | case UniB128: |
126 | return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(V: Reg); |
127 | case UniB256: |
128 | return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(V: Reg); |
129 | case UniB512: |
130 | return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(V: Reg); |
131 | case DivS1: |
132 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 1) && MUI.isDivergent(V: Reg); |
133 | case DivS16: |
134 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 16) && MUI.isDivergent(V: Reg); |
135 | case DivS32: |
136 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 32) && MUI.isDivergent(V: Reg); |
137 | case DivS64: |
138 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 64) && MUI.isDivergent(V: Reg); |
139 | case DivS128: |
140 | return MRI.getType(Reg) == LLT::scalar(SizeInBits: 128) && MUI.isDivergent(V: Reg); |
141 | case DivP0: |
142 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 0, SizeInBits: 64) && MUI.isDivergent(V: Reg); |
143 | case DivP1: |
144 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 1, SizeInBits: 64) && MUI.isDivergent(V: Reg); |
145 | case DivP3: |
146 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 3, SizeInBits: 32) && MUI.isDivergent(V: Reg); |
147 | case DivP4: |
148 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 4, SizeInBits: 64) && MUI.isDivergent(V: Reg); |
149 | case DivP5: |
150 | return MRI.getType(Reg) == LLT::pointer(AddressSpace: 5, SizeInBits: 32) && MUI.isDivergent(V: Reg); |
151 | case DivPtr32: |
152 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 32) && MUI.isDivergent(V: Reg); |
153 | case DivPtr64: |
154 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 64) && MUI.isDivergent(V: Reg); |
155 | case DivPtr128: |
156 | return isAnyPtr(Ty: MRI.getType(Reg), Width: 128) && MUI.isDivergent(V: Reg); |
157 | case DivV2S16: |
158 | return MRI.getType(Reg) == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) && MUI.isDivergent(V: Reg); |
159 | case DivB32: |
160 | return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(V: Reg); |
161 | case DivB64: |
162 | return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(V: Reg); |
163 | case DivB96: |
164 | return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(V: Reg); |
165 | case DivB128: |
166 | return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(V: Reg); |
167 | case DivB256: |
168 | return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(V: Reg); |
169 | case DivB512: |
170 | return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(V: Reg); |
171 | case _: |
172 | return true; |
173 | default: |
174 | llvm_unreachable("missing matchUniformityAndLLT" ); |
175 | } |
176 | } |
177 | |
178 | bool PredicateMapping::match(const MachineInstr &MI, |
179 | const MachineUniformityInfo &MUI, |
180 | const MachineRegisterInfo &MRI) const { |
181 | // Check LLT signature. |
182 | for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) { |
183 | if (OpUniformityAndTypes[i] == _) { |
184 | if (MI.getOperand(i).isReg()) |
185 | return false; |
186 | continue; |
187 | } |
188 | |
189 | // Remaining IDs check registers. |
190 | if (!MI.getOperand(i).isReg()) |
191 | return false; |
192 | |
193 | if (!matchUniformityAndLLT(Reg: MI.getOperand(i).getReg(), |
194 | UniID: OpUniformityAndTypes[i], MUI, MRI)) |
195 | return false; |
196 | } |
197 | |
198 | // More complex check. |
199 | if (TestFunc) |
200 | return TestFunc(MI); |
201 | |
202 | return true; |
203 | } |
204 | |
205 | SetOfRulesForOpcode::SetOfRulesForOpcode() {} |
206 | |
207 | SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes) |
208 | : FastTypes(FastTypes) {} |
209 | |
210 | UniformityLLTOpPredicateID LLTToId(LLT Ty) { |
211 | if (Ty == LLT::scalar(SizeInBits: 16)) |
212 | return S16; |
213 | if (Ty == LLT::scalar(SizeInBits: 32)) |
214 | return S32; |
215 | if (Ty == LLT::scalar(SizeInBits: 64)) |
216 | return S64; |
217 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16)) |
218 | return V2S16; |
219 | if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) |
220 | return V2S32; |
221 | if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32)) |
222 | return V3S32; |
223 | if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) |
224 | return V4S32; |
225 | return _; |
226 | } |
227 | |
228 | UniformityLLTOpPredicateID LLTToBId(LLT Ty) { |
229 | if (Ty == LLT::scalar(SizeInBits: 32) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16) || |
230 | isAnyPtr(Ty, Width: 32)) |
231 | return B32; |
232 | if (Ty == LLT::scalar(SizeInBits: 64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32) || |
233 | Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) || isAnyPtr(Ty, Width: 64)) |
234 | return B64; |
235 | if (Ty == LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32)) |
236 | return B96; |
237 | if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32) || isAnyPtr(Ty, Width: 128)) |
238 | return B128; |
239 | return _; |
240 | } |
241 | |
242 | const RegBankLLTMapping & |
243 | SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI, |
244 | const MachineRegisterInfo &MRI, |
245 | const MachineUniformityInfo &MUI) const { |
246 | // Search in "Fast Rules". |
247 | // Note: if fast rules are enabled, RegBankLLTMapping must be added in each |
248 | // slot that could "match fast Predicate". If not, InvalidMapping is |
249 | // returned which results in failure, does not search "Slow Rules". |
250 | if (FastTypes != NoFastRules) { |
251 | Register Reg = MI.getOperand(i: 0).getReg(); |
252 | int Slot; |
253 | if (FastTypes == StandardB) |
254 | Slot = getFastPredicateSlot(Ty: LLTToBId(Ty: MRI.getType(Reg))); |
255 | else |
256 | Slot = getFastPredicateSlot(Ty: LLTToId(Ty: MRI.getType(Reg))); |
257 | |
258 | if (Slot != -1) |
259 | return MUI.isUniform(V: Reg) ? Uni[Slot] : Div[Slot]; |
260 | } |
261 | |
262 | // Slow search for more complex rules. |
263 | for (const RegBankLegalizeRule &Rule : Rules) { |
264 | if (Rule.Predicate.match(MI, MUI, MRI)) |
265 | return Rule.OperandMapping; |
266 | } |
267 | |
268 | LLVM_DEBUG(dbgs() << "MI: " ; MI.dump();); |
269 | llvm_unreachable("None of the rules defined for MI's opcode matched MI" ); |
270 | } |
271 | |
272 | void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) { |
273 | Rules.push_back(Elt: Rule); |
274 | } |
275 | |
276 | void SetOfRulesForOpcode::addFastRuleDivergent(UniformityLLTOpPredicateID Ty, |
277 | RegBankLLTMapping RuleApplyIDs) { |
278 | int Slot = getFastPredicateSlot(Ty); |
279 | assert(Slot != -1 && "Ty unsupported in this FastRulesTypes" ); |
280 | Div[Slot] = RuleApplyIDs; |
281 | } |
282 | |
283 | void SetOfRulesForOpcode::addFastRuleUniform(UniformityLLTOpPredicateID Ty, |
284 | RegBankLLTMapping RuleApplyIDs) { |
285 | int Slot = getFastPredicateSlot(Ty); |
286 | assert(Slot != -1 && "Ty unsupported in this FastRulesTypes" ); |
287 | Uni[Slot] = RuleApplyIDs; |
288 | } |
289 | |
290 | int SetOfRulesForOpcode::getFastPredicateSlot( |
291 | UniformityLLTOpPredicateID Ty) const { |
292 | switch (FastTypes) { |
293 | case Standard: { |
294 | switch (Ty) { |
295 | case S32: |
296 | return 0; |
297 | case S16: |
298 | return 1; |
299 | case S64: |
300 | return 2; |
301 | case V2S16: |
302 | return 3; |
303 | default: |
304 | return -1; |
305 | } |
306 | } |
307 | case StandardB: { |
308 | switch (Ty) { |
309 | case B32: |
310 | return 0; |
311 | case B64: |
312 | return 1; |
313 | case B96: |
314 | return 2; |
315 | case B128: |
316 | return 3; |
317 | default: |
318 | return -1; |
319 | } |
320 | } |
321 | case Vector: { |
322 | switch (Ty) { |
323 | case S32: |
324 | return 0; |
325 | case V2S32: |
326 | return 1; |
327 | case V3S32: |
328 | return 2; |
329 | case V4S32: |
330 | return 3; |
331 | default: |
332 | return -1; |
333 | } |
334 | } |
335 | default: |
336 | return -1; |
337 | } |
338 | } |
339 | |
340 | RegBankLegalizeRules::RuleSetInitializer |
341 | RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList, |
342 | FastRulesTypes FastTypes) { |
343 | return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes); |
344 | } |
345 | |
346 | RegBankLegalizeRules::RuleSetInitializer |
347 | RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList, |
348 | FastRulesTypes FastTypes) { |
349 | return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes); |
350 | } |
351 | |
352 | const SetOfRulesForOpcode & |
353 | RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const { |
354 | unsigned Opc = MI.getOpcode(); |
355 | if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT || |
356 | Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS || |
357 | Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) { |
358 | unsigned IntrID = cast<GIntrinsic>(Val&: MI).getIntrinsicID(); |
359 | auto IRAIt = IRulesAlias.find(Val: IntrID); |
360 | if (IRAIt == IRulesAlias.end()) { |
361 | LLVM_DEBUG(dbgs() << "MI: " ; MI.dump();); |
362 | llvm_unreachable("No rules defined for intrinsic opcode" ); |
363 | } |
364 | return IRules.at(Val: IRAIt->second); |
365 | } |
366 | |
367 | auto GRAIt = GRulesAlias.find(Val: Opc); |
368 | if (GRAIt == GRulesAlias.end()) { |
369 | LLVM_DEBUG(dbgs() << "MI: " ; MI.dump();); |
370 | llvm_unreachable("No rules defined for generic opcode" ); |
371 | } |
372 | return GRules.at(Val: GRAIt->second); |
373 | } |
374 | |
375 | // Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'. |
376 | class Predicate { |
377 | private: |
378 | struct Elt { |
379 | // Save formula composed of Pred, '&&', '||' and '!' as a jump table. |
380 | // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C |
381 | // Sequences of && and || will be represented by jumps, for example: |
382 | // (A && B && ... X) or (A && B && ... X) || Y |
383 | // A == true jump to B |
384 | // A == false jump to end or Y, result is A(false) or Y |
385 | // (A || B || ... X) or (A || B || ... X) && Y |
386 | // A == true jump to end or Y, result is A(true) or Y |
387 | // A == false jump to B |
388 | // Notice that when negating expression, we simply flip Neg on each Pred |
389 | // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&). |
390 | std::function<bool(const MachineInstr &)> Pred; |
391 | bool Neg; // Neg of Pred is calculated before jump |
392 | unsigned TJumpOffset; |
393 | unsigned FJumpOffset; |
394 | }; |
395 | |
396 | SmallVector<Elt, 8> Expression; |
397 | |
398 | Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(RHS&: Expr); }; |
399 | |
400 | public: |
401 | Predicate(std::function<bool(const MachineInstr &)> Pred) { |
402 | Expression.push_back(Elt: {.Pred: Pred, .Neg: false, .TJumpOffset: 1, .FJumpOffset: 1}); |
403 | }; |
404 | |
405 | bool operator()(const MachineInstr &MI) const { |
406 | unsigned Idx = 0; |
407 | unsigned ResultIdx = Expression.size(); |
408 | bool Result; |
409 | do { |
410 | Result = Expression[Idx].Pred(MI); |
411 | Result = Expression[Idx].Neg ? !Result : Result; |
412 | if (Result) { |
413 | Idx += Expression[Idx].TJumpOffset; |
414 | } else { |
415 | Idx += Expression[Idx].FJumpOffset; |
416 | } |
417 | } while ((Idx != ResultIdx)); |
418 | |
419 | return Result; |
420 | }; |
421 | |
422 | Predicate operator!() const { |
423 | SmallVector<Elt, 8> NegExpression; |
424 | for (const Elt &ExprElt : Expression) { |
425 | NegExpression.push_back(Elt: {.Pred: ExprElt.Pred, .Neg: !ExprElt.Neg, .TJumpOffset: ExprElt.FJumpOffset, |
426 | .FJumpOffset: ExprElt.TJumpOffset}); |
427 | } |
428 | return Predicate(std::move(NegExpression)); |
429 | }; |
430 | |
431 | Predicate operator&&(const Predicate &RHS) const { |
432 | SmallVector<Elt, 8> AndExpression = Expression; |
433 | |
434 | unsigned RHSSize = RHS.Expression.size(); |
435 | unsigned ResultIdx = Expression.size(); |
436 | for (unsigned i = 0; i < ResultIdx; ++i) { |
437 | // LHS results in false, whole expression results in false. |
438 | if (i + AndExpression[i].FJumpOffset == ResultIdx) |
439 | AndExpression[i].FJumpOffset += RHSSize; |
440 | } |
441 | |
442 | AndExpression.append(RHS: RHS.Expression); |
443 | |
444 | return Predicate(std::move(AndExpression)); |
445 | } |
446 | |
447 | Predicate operator||(const Predicate &RHS) const { |
448 | SmallVector<Elt, 8> OrExpression = Expression; |
449 | |
450 | unsigned RHSSize = RHS.Expression.size(); |
451 | unsigned ResultIdx = Expression.size(); |
452 | for (unsigned i = 0; i < ResultIdx; ++i) { |
453 | // LHS results in true, whole expression results in true. |
454 | if (i + OrExpression[i].TJumpOffset == ResultIdx) |
455 | OrExpression[i].TJumpOffset += RHSSize; |
456 | } |
457 | |
458 | OrExpression.append(RHS: RHS.Expression); |
459 | |
460 | return Predicate(std::move(OrExpression)); |
461 | } |
462 | }; |
463 | |
464 | // Initialize rules |
465 | RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, |
466 | MachineRegisterInfo &_MRI) |
467 | : ST(&_ST), MRI(&_MRI) { |
468 | |
469 | addRulesForGOpcs(OpcList: {G_ADD, G_SUB}, FastTypes: Standard) |
470 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}) |
471 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}); |
472 | |
473 | addRulesForGOpcs(OpcList: {G_MUL}, FastTypes: Standard).Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}); |
474 | |
475 | addRulesForGOpcs(OpcList: {G_XOR, G_OR, G_AND}, FastTypes: StandardB) |
476 | .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}}) |
477 | .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc, Vcc}}}) |
478 | .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr16, Sgpr16}}}) |
479 | .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vgpr16, Vgpr16}}}) |
480 | .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {SgprB32, SgprB32}}) |
481 | .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {VgprB32, VgprB32}}) |
482 | .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {SgprB64, SgprB64}}) |
483 | .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {VgprB64, VgprB64}, SplitTo32}); |
484 | |
485 | addRulesForGOpcs(OpcList: {G_SHL}, FastTypes: Standard) |
486 | .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}}) |
487 | .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}}) |
488 | .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift}) |
489 | .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) |
490 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}) |
491 | .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}}) |
492 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}) |
493 | .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}}); |
494 | |
495 | addRulesForGOpcs(OpcList: {G_LSHR}, FastTypes: Standard) |
496 | .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}}) |
497 | .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}}) |
498 | .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift}) |
499 | .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) |
500 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}) |
501 | .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}}) |
502 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}) |
503 | .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}}); |
504 | |
505 | addRulesForGOpcs(OpcList: {G_ASHR}, FastTypes: Standard) |
506 | .Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}}) |
507 | .Div(Ty: S16, RuleApplyIDs: {{Vgpr16}, {Vgpr16, Vgpr16}}) |
508 | .Uni(Ty: V2S16, RuleApplyIDs: {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackBitShift}) |
509 | .Div(Ty: V2S16, RuleApplyIDs: {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) |
510 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}) |
511 | .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32}}) |
512 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}) |
513 | .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32}}); |
514 | |
515 | addRulesForGOpcs(OpcList: {G_FRAME_INDEX}).Any(Init: {.Predicate: {UniP5, _}, .OperandMapping: {{SgprP5}, {None}}}); |
516 | |
517 | addRulesForGOpcs(OpcList: {G_UBFX, G_SBFX}, FastTypes: Standard) |
518 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE}) |
519 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}}) |
520 | .Uni(Ty: S64, RuleApplyIDs: {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE}) |
521 | .Div(Ty: S64, RuleApplyIDs: {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE}); |
522 | |
523 | // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT |
524 | // and G_FREEZE here, rest is trivially regbankselected earlier |
525 | addRulesForGOpcs(OpcList: {G_IMPLICIT_DEF}).Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{Sgpr32Trunc}, {}}}); |
526 | addRulesForGOpcs(OpcList: {G_CONSTANT}) |
527 | .Any(Init: {.Predicate: {UniS1, _}, .OperandMapping: {{Sgpr32Trunc}, {None}, UniCstExt}}); |
528 | addRulesForGOpcs(OpcList: {G_FREEZE}).Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{Vcc}, {Vcc}}}); |
529 | |
530 | addRulesForGOpcs(OpcList: {G_ICMP}) |
531 | .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}}) |
532 | .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}}); |
533 | |
534 | addRulesForGOpcs(OpcList: {G_FCMP}) |
535 | .Any(Init: {.Predicate: {UniS1, _, S32}, .OperandMapping: {{UniInVcc}, {None, Vgpr32, Vgpr32}}}) |
536 | .Any(Init: {.Predicate: {DivS1, _, S32}, .OperandMapping: {{Vcc}, {None, Vgpr32, Vgpr32}}}); |
537 | |
538 | addRulesForGOpcs(OpcList: {G_BRCOND}) |
539 | .Any(Init: {.Predicate: {UniS1}, .OperandMapping: {{}, {Sgpr32AExtBoolInReg}}}) |
540 | .Any(Init: {.Predicate: {DivS1}, .OperandMapping: {{}, {Vcc}}}); |
541 | |
542 | addRulesForGOpcs(OpcList: {G_BR}).Any(Init: {.Predicate: {_}, .OperandMapping: {{}, {None}}}); |
543 | |
544 | addRulesForGOpcs(OpcList: {G_SELECT}, FastTypes: StandardB) |
545 | .Any(Init: {.Predicate: {DivS16}, .OperandMapping: {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}}) |
546 | .Any(Init: {.Predicate: {UniS16}, .OperandMapping: {{Sgpr16}, {Sgpr32AExtBoolInReg, Sgpr16, Sgpr16}}}) |
547 | .Div(Ty: B32, RuleApplyIDs: {{VgprB32}, {Vcc, VgprB32, VgprB32}}) |
548 | .Uni(Ty: B32, RuleApplyIDs: {{SgprB32}, {Sgpr32AExtBoolInReg, SgprB32, SgprB32}}) |
549 | .Div(Ty: B64, RuleApplyIDs: {{VgprB64}, {Vcc, VgprB64, VgprB64}, SplitTo32Select}) |
550 | .Uni(Ty: B64, RuleApplyIDs: {{SgprB64}, {Sgpr32AExtBoolInReg, SgprB64, SgprB64}}); |
551 | |
552 | addRulesForGOpcs(OpcList: {G_ANYEXT}) |
553 | .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away |
554 | .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away |
555 | .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{None}, {None}}}) // should be combined away |
556 | .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}}) |
557 | .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}}) |
558 | .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}}) |
559 | .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}}) |
560 | .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}}) |
561 | .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}) |
562 | .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}}); |
563 | |
564 | // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY. |
565 | // It is up to user to deal with truncated bits. |
566 | addRulesForGOpcs(OpcList: {G_TRUNC}) |
567 | .Any(Init: {.Predicate: {UniS1, UniS16}, .OperandMapping: {{None}, {None}}}) // should be combined away |
568 | .Any(Init: {.Predicate: {UniS1, UniS32}, .OperandMapping: {{None}, {None}}}) // should be combined away |
569 | .Any(Init: {.Predicate: {UniS1, UniS64}, .OperandMapping: {{None}, {None}}}) // should be combined away |
570 | .Any(Init: {.Predicate: {UniS16, S32}, .OperandMapping: {{Sgpr16}, {Sgpr32}}}) |
571 | .Any(Init: {.Predicate: {DivS16, S32}, .OperandMapping: {{Vgpr16}, {Vgpr32}}}) |
572 | .Any(Init: {.Predicate: {UniS32, S64}, .OperandMapping: {{Sgpr32}, {Sgpr64}}}) |
573 | .Any(Init: {.Predicate: {DivS32, S64}, .OperandMapping: {{Vgpr32}, {Vgpr64}}}) |
574 | .Any(Init: {.Predicate: {UniV2S16, V2S32}, .OperandMapping: {{SgprV2S16}, {SgprV2S32}}}) |
575 | .Any(Init: {.Predicate: {DivV2S16, V2S32}, .OperandMapping: {{VgprV2S16}, {VgprV2S32}}}) |
576 | // This is non-trivial. VgprToVccCopy is done using compare instruction. |
577 | .Any(Init: {.Predicate: {DivS1, DivS16}, .OperandMapping: {{Vcc}, {Vgpr16}, VgprToVccCopy}}) |
578 | .Any(Init: {.Predicate: {DivS1, DivS32}, .OperandMapping: {{Vcc}, {Vgpr32}, VgprToVccCopy}}) |
579 | .Any(Init: {.Predicate: {DivS1, DivS64}, .OperandMapping: {{Vcc}, {Vgpr64}, VgprToVccCopy}}); |
580 | |
581 | addRulesForGOpcs(OpcList: {G_ZEXT}) |
582 | .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
583 | .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
584 | .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
585 | .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}}) |
586 | .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}}) |
587 | .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}}) |
588 | .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}}) |
589 | .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}}) |
590 | // not extending S16 to S32 is questionable. |
591 | .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}}) |
592 | .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}}) |
593 | .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}) |
594 | .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}}); |
595 | |
596 | addRulesForGOpcs(OpcList: {G_SEXT}) |
597 | .Any(Init: {.Predicate: {UniS16, S1}, .OperandMapping: {{Sgpr32Trunc}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
598 | .Any(Init: {.Predicate: {UniS32, S1}, .OperandMapping: {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
599 | .Any(Init: {.Predicate: {UniS64, S1}, .OperandMapping: {{Sgpr64}, {Sgpr32AExtBoolInReg}, UniExtToSel}}) |
600 | .Any(Init: {.Predicate: {DivS16, S1}, .OperandMapping: {{Vgpr16}, {Vcc}, VccExtToSel}}) |
601 | .Any(Init: {.Predicate: {DivS32, S1}, .OperandMapping: {{Vgpr32}, {Vcc}, VccExtToSel}}) |
602 | .Any(Init: {.Predicate: {DivS64, S1}, .OperandMapping: {{Vgpr64}, {Vcc}, VccExtToSel}}) |
603 | .Any(Init: {.Predicate: {UniS64, S32}, .OperandMapping: {{Sgpr64}, {Sgpr32}, Ext32To64}}) |
604 | .Any(Init: {.Predicate: {DivS64, S32}, .OperandMapping: {{Vgpr64}, {Vgpr32}, Ext32To64}}) |
605 | // not extending S16 to S32 is questionable. |
606 | .Any(Init: {.Predicate: {UniS64, S16}, .OperandMapping: {{Sgpr64}, {Sgpr32SExt}, Ext32To64}}) |
607 | .Any(Init: {.Predicate: {DivS64, S16}, .OperandMapping: {{Vgpr64}, {Vgpr32SExt}, Ext32To64}}) |
608 | .Any(Init: {.Predicate: {UniS32, S16}, .OperandMapping: {{Sgpr32}, {Sgpr16}}}) |
609 | .Any(Init: {.Predicate: {DivS32, S16}, .OperandMapping: {{Vgpr32}, {Vgpr16}}}); |
610 | |
611 | addRulesForGOpcs(OpcList: {G_SEXT_INREG}) |
612 | .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}) |
613 | .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}}) |
614 | .Any(Init: {.Predicate: {UniS64, S64}, .OperandMapping: {{Sgpr64}, {Sgpr64}}}) |
615 | .Any(Init: {.Predicate: {DivS64, S64}, .OperandMapping: {{Vgpr64}, {Vgpr64}, SplitTo32SExtInReg}}); |
616 | |
617 | bool hasUnalignedLoads = ST->getGeneration() >= AMDGPUSubtarget::GFX12; |
618 | bool hasSMRDSmall = ST->hasScalarSubwordLoads(); |
619 | |
620 | Predicate isAlign16([](const MachineInstr &MI) -> bool { |
621 | return (*MI.memoperands_begin())->getAlign() >= Align(16); |
622 | }); |
623 | |
624 | Predicate isAlign4([](const MachineInstr &MI) -> bool { |
625 | return (*MI.memoperands_begin())->getAlign() >= Align(4); |
626 | }); |
627 | |
628 | Predicate isAtomicMMO([](const MachineInstr &MI) -> bool { |
629 | return (*MI.memoperands_begin())->isAtomic(); |
630 | }); |
631 | |
632 | Predicate isUniMMO([](const MachineInstr &MI) -> bool { |
633 | return AMDGPU::isUniformMMO(MMO: *MI.memoperands_begin()); |
634 | }); |
635 | |
636 | Predicate isConst([](const MachineInstr &MI) -> bool { |
637 | // Address space in MMO be different then address space on pointer. |
638 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
639 | const unsigned AS = MMO->getAddrSpace(); |
640 | return AS == AMDGPUAS::CONSTANT_ADDRESS || |
641 | AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
642 | }); |
643 | |
644 | Predicate isVolatileMMO([](const MachineInstr &MI) -> bool { |
645 | return (*MI.memoperands_begin())->isVolatile(); |
646 | }); |
647 | |
648 | Predicate isInvMMO([](const MachineInstr &MI) -> bool { |
649 | return (*MI.memoperands_begin())->isInvariant(); |
650 | }); |
651 | |
652 | Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool { |
653 | return (*MI.memoperands_begin())->getFlags() & MONoClobber; |
654 | }); |
655 | |
656 | Predicate isNaturalAlignedSmall([](const MachineInstr &MI) -> bool { |
657 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
658 | const unsigned MemSize = 8 * MMO->getSize().getValue(); |
659 | return (MemSize == 16 && MMO->getAlign() >= Align(2)) || |
660 | (MemSize == 8 && MMO->getAlign() >= Align(1)); |
661 | }); |
662 | |
663 | auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) && |
664 | (isConst || isInvMMO || isNoClobberMMO); |
665 | |
666 | // clang-format off |
667 | addRulesForGOpcs(OpcList: {G_LOAD}) |
668 | .Any(Init: {.Predicate: {DivB32, DivP0}, .OperandMapping: {{VgprB32}, {VgprP0}}}) |
669 | |
670 | .Any(Init: {.Predicate: {DivB32, DivP1}, .OperandMapping: {{VgprB32}, {VgprP1}}}) |
671 | .Any(Init: {.Predicate: {{UniB256, UniP1}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP1}}}) |
672 | .Any(Init: {.Predicate: {{UniB512, UniP1}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP1}}}) |
673 | .Any(Init: {.Predicate: {{UniB32, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {SgprP1}}}) |
674 | .Any(Init: {.Predicate: {{UniB256, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {VgprP1}, SplitLoad}}) |
675 | .Any(Init: {.Predicate: {{UniB512, UniP1}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {VgprP1}, SplitLoad}}) |
676 | |
677 | .Any(Init: {.Predicate: {DivB32, UniP3}, .OperandMapping: {{VgprB32}, {VgprP3}}}) |
678 | .Any(Init: {.Predicate: {{UniB32, UniP3}, isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP3}}}) |
679 | .Any(Init: {.Predicate: {{UniB32, UniP3}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) |
680 | |
681 | .Any(Init: {.Predicate: {{DivB256, DivP4}}, .OperandMapping: {{VgprB256}, {VgprP4}, SplitLoad}}) |
682 | .Any(Init: {.Predicate: {{UniB32, UniP4}, isNaturalAlignedSmall && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}, STPred: hasSMRDSmall) // i8 and i16 load |
683 | .Any(Init: {.Predicate: {{UniB32, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB32}, {SgprP4}}}) |
684 | .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, WidenLoad}}, STPred: !hasUnalignedLoads) |
685 | .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign4 && !isAlign16 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}, SplitLoad}}, STPred: !hasUnalignedLoads) |
686 | .Any(Init: {.Predicate: {{UniB96, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB96}, {SgprP4}}}, STPred: hasUnalignedLoads) |
687 | .Any(Init: {.Predicate: {{UniB256, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB256}, {SgprP4}}}) |
688 | .Any(Init: {.Predicate: {{UniB512, UniP4}, isAlign4 && isUL}, .OperandMapping: {{SgprB512}, {SgprP4}}}) |
689 | .Any(Init: {.Predicate: {{UniB32, UniP4}, !isNaturalAlignedSmall || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}}, STPred: hasSMRDSmall) // i8 and i16 load |
690 | .Any(Init: {.Predicate: {{UniB32, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}}) |
691 | .Any(Init: {.Predicate: {{UniB256, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB256}, {VgprP4}, SplitLoad}}) |
692 | .Any(Init: {.Predicate: {{UniB512, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB512}, {VgprP4}, SplitLoad}}) |
693 | |
694 | .Any(Init: {.Predicate: {DivB32, P5}, .OperandMapping: {{VgprB32}, {VgprP5}}}); |
695 | |
696 | addRulesForGOpcs(OpcList: {G_ZEXTLOAD}) // i8 and i16 zero-extending loads |
697 | .Any(Init: {.Predicate: {{UniB32, UniP3}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP3}}}) |
698 | .Any(Init: {.Predicate: {{UniB32, UniP4}, !isAlign4 || !isUL}, .OperandMapping: {{UniInVgprB32}, {VgprP4}}}); |
699 | // clang-format on |
700 | |
701 | addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_LOAD}, FastTypes: Vector) |
702 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
703 | .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
704 | .Div(Ty: V4S32, RuleApplyIDs: {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}) |
705 | .Uni(Ty: V4S32, RuleApplyIDs: {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}); |
706 | |
707 | addRulesForGOpcs(OpcList: {G_STORE}) |
708 | .Any(Init: {.Predicate: {S32, P0}, .OperandMapping: {{}, {Vgpr32, VgprP0}}}) |
709 | .Any(Init: {.Predicate: {S32, P1}, .OperandMapping: {{}, {Vgpr32, VgprP1}}}) |
710 | .Any(Init: {.Predicate: {S64, P1}, .OperandMapping: {{}, {Vgpr64, VgprP1}}}) |
711 | .Any(Init: {.Predicate: {V4S32, P1}, .OperandMapping: {{}, {VgprV4S32, VgprP1}}}); |
712 | |
713 | addRulesForGOpcs(OpcList: {G_AMDGPU_BUFFER_STORE}) |
714 | .Any(Init: {.Predicate: {S32}, .OperandMapping: {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}}); |
715 | |
716 | addRulesForGOpcs(OpcList: {G_PTR_ADD}) |
717 | .Any(Init: {.Predicate: {UniP1}, .OperandMapping: {{SgprP1}, {SgprP1, Sgpr64}}}) |
718 | .Any(Init: {.Predicate: {DivP1}, .OperandMapping: {{VgprP1}, {VgprP1, Vgpr64}}}) |
719 | .Any(Init: {.Predicate: {DivP0}, .OperandMapping: {{VgprP0}, {VgprP0, Vgpr64}}}); |
720 | |
721 | addRulesForGOpcs(OpcList: {G_INTTOPTR}) |
722 | .Any(Init: {.Predicate: {UniPtr32}, .OperandMapping: {{SgprPtr32}, {Sgpr32}}}) |
723 | .Any(Init: {.Predicate: {DivPtr32}, .OperandMapping: {{VgprPtr32}, {Vgpr32}}}) |
724 | .Any(Init: {.Predicate: {UniPtr64}, .OperandMapping: {{SgprPtr64}, {Sgpr64}}}) |
725 | .Any(Init: {.Predicate: {DivPtr64}, .OperandMapping: {{VgprPtr64}, {Vgpr64}}}) |
726 | .Any(Init: {.Predicate: {UniPtr128}, .OperandMapping: {{SgprPtr128}, {Sgpr128}}}) |
727 | .Any(Init: {.Predicate: {DivPtr128}, .OperandMapping: {{VgprPtr128}, {Vgpr128}}}); |
728 | |
729 | addRulesForGOpcs(OpcList: {G_PTRTOINT}) |
730 | .Any(Init: {.Predicate: {UniS32}, .OperandMapping: {{Sgpr32}, {SgprPtr32}}}) |
731 | .Any(Init: {.Predicate: {DivS32}, .OperandMapping: {{Vgpr32}, {VgprPtr32}}}) |
732 | .Any(Init: {.Predicate: {UniS64}, .OperandMapping: {{Sgpr64}, {SgprPtr64}}}) |
733 | .Any(Init: {.Predicate: {DivS64}, .OperandMapping: {{Vgpr64}, {VgprPtr64}}}) |
734 | .Any(Init: {.Predicate: {UniS128}, .OperandMapping: {{Sgpr128}, {SgprPtr128}}}) |
735 | .Any(Init: {.Predicate: {DivS128}, .OperandMapping: {{Vgpr128}, {VgprPtr128}}}); |
736 | |
737 | addRulesForGOpcs(OpcList: {G_ABS}, FastTypes: Standard).Uni(Ty: S16, RuleApplyIDs: {{Sgpr32Trunc}, {Sgpr32SExt}}); |
738 | |
739 | bool hasSALUFloat = ST->hasSALUFloatInsts(); |
740 | |
741 | addRulesForGOpcs(OpcList: {G_FADD}, FastTypes: Standard) |
742 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {Sgpr32, Sgpr32}}, STPred: hasSALUFloat) |
743 | .Uni(Ty: S32, RuleApplyIDs: {{UniInVgprS32}, {Vgpr32, Vgpr32}}, STPred: !hasSALUFloat) |
744 | .Div(Ty: S32, RuleApplyIDs: {{Vgpr32}, {Vgpr32, Vgpr32}}); |
745 | |
746 | addRulesForGOpcs(OpcList: {G_FPTOUI}) |
747 | .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat) |
748 | .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat); |
749 | |
750 | addRulesForGOpcs(OpcList: {G_UITOFP}) |
751 | .Any(Init: {.Predicate: {DivS32, S32}, .OperandMapping: {{Vgpr32}, {Vgpr32}}}) |
752 | .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{Sgpr32}, {Sgpr32}}}, STPred: hasSALUFloat) |
753 | .Any(Init: {.Predicate: {UniS32, S32}, .OperandMapping: {{UniInVgprS32}, {Vgpr32}}}, STPred: !hasSALUFloat); |
754 | |
755 | using namespace Intrinsic; |
756 | |
757 | addRulesForIOpcs(OpcList: {amdgcn_s_getpc}).Any(Init: {.Predicate: {UniS64, _}, .OperandMapping: {{Sgpr64}, {None}}}); |
758 | |
759 | // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir. |
760 | addRulesForIOpcs(OpcList: {amdgcn_end_cf}).Any(Init: {.Predicate: {_, S32}, .OperandMapping: {{}, {None, Sgpr32}}}); |
761 | |
762 | addRulesForIOpcs(OpcList: {amdgcn_if_break}, FastTypes: Standard) |
763 | .Uni(Ty: S32, RuleApplyIDs: {{Sgpr32}, {IntrId, Vcc, Sgpr32}}); |
764 | |
765 | addRulesForIOpcs(OpcList: {amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, FastTypes: Standard) |
766 | .Div(Ty: S32, RuleApplyIDs: {{}, {Vgpr32, None, Vgpr32, Vgpr32}}); |
767 | |
768 | addRulesForIOpcs(OpcList: {amdgcn_readfirstlane}) |
769 | .Any(Init: {.Predicate: {UniS32, _, DivS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}}) |
770 | // this should not exist in the first place, it is from call lowering |
771 | // readfirstlaning just in case register is not in sgpr. |
772 | .Any(Init: {.Predicate: {UniS32, _, UniS32}, .OperandMapping: {{}, {Sgpr32, None, Vgpr32}}}); |
773 | |
774 | } // end initialize rules |
775 | |