1//===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallVector.h"
14#include <functional>
15
16namespace llvm {
17
18class LLT;
19class MachineRegisterInfo;
20class MachineInstr;
21class GCNSubtarget;
22class MachineFunction;
23template <typename T> class GenericUniformityInfo;
24template <typename T> class GenericSSAContext;
25using MachineSSAContext = GenericSSAContext<MachineFunction>;
26using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
27
28namespace AMDGPU {
29
30/// \returns true if \p Ty is a pointer type with size \p Width.
31bool isAnyPtr(LLT Ty, unsigned Width);
32
33// IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
34// or more IDs and each represents a check for 'uniform or divergent' + LLT or
35// just LLT on register operand.
36// Most often checking one operand is enough to decide which RegBankLLTMapping
37// to apply (see Fast Rules), IDs are useful when two or more operands need to
38// be checked.
39enum UniformityLLTOpPredicateID {
40 // Represents non-register and physical register operands.
41 _,
42 // scalars
43 S1,
44 S16,
45 S32,
46 S64,
47 S128,
48
49 UniS1,
50 UniS16,
51 UniS32,
52 UniS64,
53 UniS128,
54
55 DivS1,
56 DivS16,
57 DivS32,
58 DivS64,
59 DivS128,
60
61 // pointers
62 P0,
63 P1,
64 P2,
65 P3,
66 P4,
67 P5,
68 P8,
69 Ptr32,
70 Ptr64,
71 Ptr128,
72
73 UniP0,
74 UniP1,
75 UniP2,
76 UniP3,
77 UniP4,
78 UniP5,
79 UniP8,
80 UniPtr32,
81 UniPtr64,
82 UniPtr128,
83
84 DivP0,
85 DivP1,
86 DivP2,
87 DivP3,
88 DivP4,
89 DivP5,
90 DivPtr32,
91 DivPtr64,
92 DivPtr128,
93
94 // vectors
95 V2S16,
96 V2S32,
97 V2S64,
98 V3S32,
99 V4S32,
100
101 UniV2S16,
102 UniV2S32,
103 UniV2S64,
104
105 DivV2S16,
106 DivV2S32,
107 DivV2S64,
108 DivV3S32,
109 DivV4S16,
110
111 // B types
112 B32,
113 B64,
114 B96,
115 B128,
116 B160,
117 B256,
118 B512,
119
120 UniB32,
121 UniB64,
122 UniB96,
123 UniB128,
124 UniB160,
125 UniB256,
126 UniB512,
127 UniBRC,
128
129 DivB32,
130 DivB64,
131 DivB96,
132 DivB128,
133 DivB160,
134 DivB256,
135 DivB512,
136 DivBRC
137};
138
139// How to apply register bank on register operand.
140// In most cases, this serves as a LLT and register bank assert.
141// Can change operands and insert copies, extends, truncs, and read-any-lanes.
142// Anything more complicated requires LoweringMethod.
143enum RegBankLLTMappingApplyID {
144 InvalidMapping,
145 None,
146 IntrId,
147 Imm,
148 Vcc,
149
150 // sgpr scalars, pointers, vectors and B-types
151 Sgpr16,
152 Sgpr32,
153 Sgpr64,
154 Sgpr128,
155 SgprP0,
156 SgprP1,
157 SgprP2,
158 SgprP3,
159 SgprP4,
160 SgprP5,
161 SgprP8,
162 SgprPtr32,
163 SgprPtr64,
164 SgprPtr128,
165 SgprV2S16,
166 SgprV4S32,
167 SgprV2S32,
168 SgprB32,
169 SgprB64,
170 SgprB96,
171 SgprB128,
172 SgprB256,
173 SgprB512,
174 SgprBRC,
175
176 // vgpr scalars, pointers, vectors and B-types
177 Vgpr16,
178 Vgpr32,
179 Vgpr64,
180 Vgpr128,
181 VgprP0,
182 VgprP1,
183 VgprP2,
184 VgprP3,
185 VgprP4,
186 VgprP5,
187 VgprPtr32,
188 VgprPtr64,
189 VgprPtr128,
190 VgprV2S16,
191 VgprV2S32,
192 VgprV3S32,
193 VgprB32,
194 VgprB64,
195 VgprB96,
196 VgprB128,
197 VgprB160,
198 VgprB256,
199 VgprB512,
200 VgprBRC,
201 VgprV4S16,
202 VgprV4S32,
203 VgprV8S32,
204 VgprV2S64,
205
206 // Dst only modifiers: read-any-lane and truncs
207 UniInVcc,
208 UniInVgprS16,
209 UniInVgprS32,
210 UniInVgprS64,
211 UniInVgprV2S16,
212 UniInVgprV2S32,
213 UniInVgprV4S32,
214 UniInVgprV2S64,
215 UniInVgprB32,
216 UniInVgprB64,
217 UniInVgprB96,
218 UniInVgprB128,
219 UniInVgprB160,
220 UniInVgprB256,
221 UniInVgprB512,
222
223 Sgpr32Trunc,
224
225 // Src only modifiers: execute in waterfall loop if divergent
226 Sgpr32_WF,
227 SgprV4S32_WF,
228
229 // Src only modifiers: execute in waterfall loop for calls
230 SgprP0Call_WF,
231 SgprP4Call_WF,
232
233 // Src only modifiers: for operands that must end up in M0. If divergent,
234 // readfirstlane to SGPR. The result can then be copied to M0 in ISel.
235 SgprB32_M0,
236
237 // Src only modifiers: extends
238 Sgpr32AExt,
239 Sgpr32AExtBoolInReg,
240 Sgpr32SExt,
241 Sgpr32ZExt,
242 Vgpr32AExt,
243 Vgpr32SExt,
244 Vgpr32ZExt,
245};
246
247// Instruction needs to be replaced with sequence of instructions. Lowering was
248// not done by legalizer since instructions is available in either sgpr or vgpr.
249// For example S64 AND is available on sgpr, for that reason S64 AND is legal in
250// context of Legalizer that only checks LLT. But S64 AND is not available on
251// vgpr. Lower it to two S32 vgpr ANDs.
252enum LoweringMethodID {
253 DoNotLower,
254 VccExtToSel,
255 UniExtToSel,
256 UnpackBitShift,
257 UnpackMinMax,
258 S_BFE,
259 V_BFE,
260 VgprToVccCopy,
261 UniMAD64,
262 UniMul64,
263 DivSMulToMAD,
264 SplitTo32,
265 SplitTo32Mul,
266 ScalarizeToS16,
267 SplitTo32Select,
268 SplitTo32SExtInReg,
269 Ext32To64,
270 UniCstExt,
271 SplitLoad,
272 WidenLoad,
273 WidenMMOToS32,
274 UnpackAExt,
275 VerifyAllSgpr,
276 ApplyAllVgpr,
277 UnmergeToShiftTrunc,
278 AextToS32InIncomingBlockGPHI,
279 VerifyAllSgprGPHI,
280 VerifyAllSgprOrVgprGPHI,
281 ApplyINTRIN_IMAGE,
282 SplitBitCount64To32
283};
284
285enum FastRulesTypes {
286 NoFastRules,
287 Standard, // S16, S32, S64, V2S16
288 StandardB, // B32, B64, B96, B128
289 Vector, // S32, V2S32, V3S32, V4S32
290};
291
292struct RegBankLLTMapping {
293 SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
294 SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
295 LoweringMethodID LoweringMethod;
296 RegBankLLTMapping(
297 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
298 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
299 LoweringMethodID LoweringMethod = DoNotLower);
300};
301
302struct PredicateMapping {
303 SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
304 std::function<bool(const MachineInstr &)> TestFunc;
305 PredicateMapping(
306 std::initializer_list<UniformityLLTOpPredicateID> OpList,
307 std::function<bool(const MachineInstr &)> TestFunc = nullptr);
308
309 bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
310 const MachineRegisterInfo &MRI) const;
311};
312
313struct RegBankLegalizeRule {
314 PredicateMapping Predicate;
315 RegBankLLTMapping OperandMapping;
316};
317
318class SetOfRulesForOpcode {
319 // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
320 SmallVector<RegBankLegalizeRule, 4> Rules;
321
322 // "Fast Rules"
323 // Instead of testing each 'Rules[i].Predicate' we do direct access to
324 // RegBankLLTMapping using getFastPredicateSlot. For example if:
325 // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
326 // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
327 FastRulesTypes FastTypes = NoFastRules;
328#define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
329 RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
330 RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
331
332public:
333 SetOfRulesForOpcode();
334 SetOfRulesForOpcode(FastRulesTypes FastTypes);
335
336 const RegBankLLTMapping *
337 findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
338 const MachineUniformityInfo &MUI) const;
339
340 void addRule(RegBankLegalizeRule Rule);
341
342 void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
343 RegBankLLTMapping RuleApplyIDs);
344 void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
345 RegBankLLTMapping RuleApplyIDs);
346
347private:
348 int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
349};
350
351// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
352// little more efficient.
353class RegBankLegalizeRules {
354 const GCNSubtarget *ST;
355 MachineRegisterInfo *MRI;
356 // Separate maps for G-opcodes and intrinsics since they are in different
357 // enums. Multiple opcodes can share same set of rules.
358 // RulesAlias = map<Opcode, KeyOpcode>
359 // Rules = map<KeyOpcode, SetOfRulesForOpcode>
360 SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
361 SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
362 SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
363 SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
364 class RuleSetInitializer {
365 SetOfRulesForOpcode *RuleSet;
366
367 public:
368 // Used for clang-format line breaks and to force writing all rules for
369 // opcode in same place.
370 template <class AliasMap, class RulesMap>
371 RuleSetInitializer(std::initializer_list<unsigned> OpcList,
372 AliasMap &RulesAlias, RulesMap &Rules,
373 FastRulesTypes FastTypes = NoFastRules) {
374 unsigned KeyOpcode = *OpcList.begin();
375 for (unsigned Opc : OpcList) {
376 [[maybe_unused]] auto [_, NewInput] =
377 RulesAlias.try_emplace(Opc, KeyOpcode);
378 assert(NewInput && "Can't redefine existing Rules");
379 }
380
381 auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
382 assert(NewInput && "Can't redefine existing Rules");
383
384 RuleSet = &DenseMapIter->second;
385 }
386
387 RuleSetInitializer(const RuleSetInitializer &) = delete;
388 RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
389 RuleSetInitializer(RuleSetInitializer &&) = delete;
390 RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
391 ~RuleSetInitializer() = default;
392
393 RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
394 RegBankLLTMapping RuleApplyIDs,
395 bool STPred = true) {
396 if (STPred)
397 RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
398 return *this;
399 }
400
401 RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
402 RegBankLLTMapping RuleApplyIDs,
403 bool STPred = true) {
404 if (STPred)
405 RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
406 return *this;
407 }
408
409 RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
410 if (STPred)
411 RuleSet->addRule(Rule: Init);
412 return *this;
413 }
414 };
415
416 RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
417 FastRulesTypes FastTypes = NoFastRules);
418
419 RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
420 FastRulesTypes FastTypes = NoFastRules);
421
422public:
423 // Initialize rules for all opcodes.
424 RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
425
426 // In case we don't want to regenerate same rules, we can use already
427 // generated rules but need to refresh references to objects that are
428 // created for this run.
429 void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
430 ST = &_ST;
431 MRI = &_MRI;
432 };
433
434 const SetOfRulesForOpcode *getRulesForOpc(MachineInstr &MI) const;
435};
436
437} // end namespace AMDGPU
438} // end namespace llvm
439
440#endif
441