1//===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallVector.h"
14#include <functional>
15
16namespace llvm {
17
18class LLT;
19class MachineRegisterInfo;
20class MachineInstr;
21class GCNSubtarget;
22class MachineFunction;
23template <typename T> class GenericUniformityInfo;
24template <typename T> class GenericSSAContext;
25using MachineSSAContext = GenericSSAContext<MachineFunction>;
26using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
27
28namespace AMDGPU {
29
30/// \returns true if \p Ty is a pointer type with size \p Width.
31bool isAnyPtr(LLT Ty, unsigned Width);
32
33// IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
34// or more IDs and each represents a check for 'uniform or divergent' + LLT or
35// just LLT on register operand.
36// Most often checking one operand is enough to decide which RegBankLLTMapping
37// to apply (see Fast Rules), IDs are useful when two or more operands need to
38// be checked.
39enum UniformityLLTOpPredicateID {
40 _,
41 // scalars
42 S1,
43 S16,
44 S32,
45 S64,
46 S128,
47
48 UniS1,
49 UniS16,
50 UniS32,
51 UniS64,
52 UniS128,
53
54 DivS1,
55 DivS16,
56 DivS32,
57 DivS64,
58 DivS128,
59
60 // pointers
61 P0,
62 P1,
63 P2,
64 P3,
65 P4,
66 P5,
67 P8,
68 Ptr32,
69 Ptr64,
70 Ptr128,
71
72 UniP0,
73 UniP1,
74 UniP2,
75 UniP3,
76 UniP4,
77 UniP5,
78 UniP8,
79 UniPtr32,
80 UniPtr64,
81 UniPtr128,
82
83 DivP0,
84 DivP1,
85 DivP2,
86 DivP3,
87 DivP4,
88 DivP5,
89 DivPtr32,
90 DivPtr64,
91 DivPtr128,
92
93 // vectors
94 V2S16,
95 V2S32,
96 V2S64,
97 V3S32,
98 V4S32,
99
100 UniV2S16,
101 UniV2S32,
102 UniV2S64,
103
104 DivV2S16,
105 DivV2S32,
106 DivV2S64,
107
108 // B types
109 B32,
110 B64,
111 B96,
112 B128,
113 B256,
114 B512,
115
116 UniB32,
117 UniB64,
118 UniB96,
119 UniB128,
120 UniB256,
121 UniB512,
122 UniBRC,
123
124 DivB32,
125 DivB64,
126 DivB96,
127 DivB128,
128 DivB256,
129 DivB512,
130 DivBRC
131};
132
133// How to apply register bank on register operand.
134// In most cases, this serves as a LLT and register bank assert.
135// Can change operands and insert copies, extends, truncs, and read-any-lanes.
136// Anything more complicated requires LoweringMethod.
137enum RegBankLLTMappingApplyID {
138 InvalidMapping,
139 None,
140 IntrId,
141 Imm,
142 Vcc,
143
144 // sgpr scalars, pointers, vectors and B-types
145 Sgpr16,
146 Sgpr32,
147 Sgpr64,
148 Sgpr128,
149 SgprP0,
150 SgprP1,
151 SgprP2,
152 SgprP3,
153 SgprP4,
154 SgprP5,
155 SgprP8,
156 SgprPtr32,
157 SgprPtr64,
158 SgprPtr128,
159 SgprV2S16,
160 SgprV4S32,
161 SgprV2S32,
162 SgprB32,
163 SgprB64,
164 SgprB96,
165 SgprB128,
166 SgprB256,
167 SgprB512,
168
169 // vgpr scalars, pointers, vectors and B-types
170 Vgpr16,
171 Vgpr32,
172 Vgpr64,
173 Vgpr128,
174 VgprP0,
175 VgprP1,
176 VgprP2,
177 VgprP3,
178 VgprP4,
179 VgprP5,
180 VgprPtr32,
181 VgprPtr64,
182 VgprPtr128,
183 VgprV2S16,
184 VgprV2S32,
185 VgprV3S32,
186 VgprB32,
187 VgprB64,
188 VgprB96,
189 VgprB128,
190 VgprB256,
191 VgprB512,
192 VgprV4S32,
193 VgprV2S64,
194
195 // Dst only modifiers: read-any-lane and truncs
196 UniInVcc,
197 UniInVgprS16,
198 UniInVgprS32,
199 UniInVgprS64,
200 UniInVgprV2S16,
201 UniInVgprV2S32,
202 UniInVgprV4S32,
203 UniInVgprV2S64,
204 UniInVgprB32,
205 UniInVgprB64,
206 UniInVgprB96,
207 UniInVgprB128,
208 UniInVgprB256,
209 UniInVgprB512,
210
211 Sgpr32Trunc,
212
213 // Src only modifiers: execute in waterfall loop if divergent
214 Sgpr32_WF,
215 SgprV4S32_WF,
216
217 // Src only modifiers: extends
218 Sgpr32AExt,
219 Sgpr32AExtBoolInReg,
220 Sgpr32SExt,
221 Sgpr32ZExt,
222 Vgpr32AExt,
223 Vgpr32SExt,
224 Vgpr32ZExt,
225};
226
227// Instruction needs to be replaced with sequence of instructions. Lowering was
228// not done by legalizer since instructions is available in either sgpr or vgpr.
229// For example S64 AND is available on sgpr, for that reason S64 AND is legal in
230// context of Legalizer that only checks LLT. But S64 AND is not available on
231// vgpr. Lower it to two S32 vgpr ANDs.
232enum LoweringMethodID {
233 DoNotLower,
234 VccExtToSel,
235 UniExtToSel,
236 UnpackBitShift,
237 UnpackMinMax,
238 S_BFE,
239 V_BFE,
240 VgprToVccCopy,
241 UniMAD64,
242 UniMul64,
243 DivSMulToMAD,
244 SplitTo32,
245 SplitTo32Mul,
246 ScalarizeToS16,
247 SplitTo32Select,
248 SplitTo32SExtInReg,
249 Ext32To64,
250 UniCstExt,
251 SplitLoad,
252 WidenLoad,
253 WidenMMOToS32,
254 UnpackAExt,
255 VerifyAllSgpr,
256 ApplyAllVgpr,
257 UnmergeToShiftTrunc
258};
259
260enum FastRulesTypes {
261 NoFastRules,
262 Standard, // S16, S32, S64, V2S16
263 StandardB, // B32, B64, B96, B128
264 Vector, // S32, V2S32, V3S32, V4S32
265};
266
267struct RegBankLLTMapping {
268 SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
269 SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
270 LoweringMethodID LoweringMethod;
271 RegBankLLTMapping(
272 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
273 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
274 LoweringMethodID LoweringMethod = DoNotLower);
275};
276
277struct PredicateMapping {
278 SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
279 std::function<bool(const MachineInstr &)> TestFunc;
280 PredicateMapping(
281 std::initializer_list<UniformityLLTOpPredicateID> OpList,
282 std::function<bool(const MachineInstr &)> TestFunc = nullptr);
283
284 bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
285 const MachineRegisterInfo &MRI) const;
286};
287
288struct RegBankLegalizeRule {
289 PredicateMapping Predicate;
290 RegBankLLTMapping OperandMapping;
291};
292
293class SetOfRulesForOpcode {
294 // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
295 SmallVector<RegBankLegalizeRule, 4> Rules;
296
297 // "Fast Rules"
298 // Instead of testing each 'Rules[i].Predicate' we do direct access to
299 // RegBankLLTMapping using getFastPredicateSlot. For example if:
300 // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
301 // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
302 FastRulesTypes FastTypes = NoFastRules;
303#define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
304 RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
305 RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
306
307public:
308 SetOfRulesForOpcode();
309 SetOfRulesForOpcode(FastRulesTypes FastTypes);
310
311 const RegBankLLTMapping *
312 findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
313 const MachineUniformityInfo &MUI) const;
314
315 void addRule(RegBankLegalizeRule Rule);
316
317 void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
318 RegBankLLTMapping RuleApplyIDs);
319 void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
320 RegBankLLTMapping RuleApplyIDs);
321
322private:
323 int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
324};
325
326// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
327// little more efficient.
328class RegBankLegalizeRules {
329 const GCNSubtarget *ST;
330 MachineRegisterInfo *MRI;
331 // Separate maps for G-opcodes and intrinsics since they are in different
332 // enums. Multiple opcodes can share same set of rules.
333 // RulesAlias = map<Opcode, KeyOpcode>
334 // Rules = map<KeyOpcode, SetOfRulesForOpcode>
335 SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
336 SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
337 SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
338 SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
339 class RuleSetInitializer {
340 SetOfRulesForOpcode *RuleSet;
341
342 public:
343 // Used for clang-format line breaks and to force writing all rules for
344 // opcode in same place.
345 template <class AliasMap, class RulesMap>
346 RuleSetInitializer(std::initializer_list<unsigned> OpcList,
347 AliasMap &RulesAlias, RulesMap &Rules,
348 FastRulesTypes FastTypes = NoFastRules) {
349 unsigned KeyOpcode = *OpcList.begin();
350 for (unsigned Opc : OpcList) {
351 [[maybe_unused]] auto [_, NewInput] =
352 RulesAlias.try_emplace(Opc, KeyOpcode);
353 assert(NewInput && "Can't redefine existing Rules");
354 }
355
356 auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
357 assert(NewInput && "Can't redefine existing Rules");
358
359 RuleSet = &DenseMapIter->second;
360 }
361
362 RuleSetInitializer(const RuleSetInitializer &) = delete;
363 RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
364 RuleSetInitializer(RuleSetInitializer &&) = delete;
365 RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
366 ~RuleSetInitializer() = default;
367
368 RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
369 RegBankLLTMapping RuleApplyIDs,
370 bool STPred = true) {
371 if (STPred)
372 RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
373 return *this;
374 }
375
376 RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
377 RegBankLLTMapping RuleApplyIDs,
378 bool STPred = true) {
379 if (STPred)
380 RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
381 return *this;
382 }
383
384 RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
385 if (STPred)
386 RuleSet->addRule(Rule: Init);
387 return *this;
388 }
389 };
390
391 RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
392 FastRulesTypes FastTypes = NoFastRules);
393
394 RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
395 FastRulesTypes FastTypes = NoFastRules);
396
397public:
398 // Initialize rules for all opcodes.
399 RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
400
401 // In case we don't want to regenerate same rules, we can use already
402 // generated rules but need to refresh references to objects that are
403 // created for this run.
404 void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
405 ST = &_ST;
406 MRI = &_MRI;
407 };
408
409 const SetOfRulesForOpcode *getRulesForOpc(MachineInstr &MI) const;
410};
411
412} // end namespace AMDGPU
413} // end namespace llvm
414
415#endif
416