1//===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallVector.h"
14#include <functional>
15
16namespace llvm {
17
18class LLT;
19class MachineRegisterInfo;
20class MachineInstr;
21class GCNSubtarget;
22class MachineFunction;
23template <typename T> class GenericUniformityInfo;
24template <typename T> class GenericSSAContext;
25using MachineSSAContext = GenericSSAContext<MachineFunction>;
26using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
27
28namespace AMDGPU {
29
30/// \returns true if \p Ty is a pointer type with size \p Width.
31bool isAnyPtr(LLT Ty, unsigned Width);
32
33// IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
34// or more IDs and each represents a check for 'uniform or divergent' + LLT or
35// just LLT on register operand.
36// Most often checking one operand is enough to decide which RegBankLLTMapping
37// to apply (see Fast Rules), IDs are useful when two or more operands need to
38// be checked.
39enum UniformityLLTOpPredicateID {
40 _,
41 // scalars
42 S1,
43 S16,
44 S32,
45 S64,
46 S128,
47
48 UniS1,
49 UniS16,
50 UniS32,
51 UniS64,
52 UniS128,
53
54 DivS1,
55 DivS16,
56 DivS32,
57 DivS64,
58 DivS128,
59
60 // pointers
61 P0,
62 P1,
63 P3,
64 P4,
65 P5,
66 Ptr32,
67 Ptr64,
68 Ptr128,
69
70 UniP0,
71 UniP1,
72 UniP3,
73 UniP4,
74 UniP5,
75 UniPtr32,
76 UniPtr64,
77 UniPtr128,
78
79 DivP0,
80 DivP1,
81 DivP3,
82 DivP4,
83 DivP5,
84 DivPtr32,
85 DivPtr64,
86 DivPtr128,
87
88 // vectors
89 V2S16,
90 V2S32,
91 V3S32,
92 V4S32,
93
94 UniV2S16,
95
96 DivV2S16,
97
98 // B types
99 B32,
100 B64,
101 B96,
102 B128,
103 B256,
104 B512,
105
106 UniB32,
107 UniB64,
108 UniB96,
109 UniB128,
110 UniB256,
111 UniB512,
112
113 DivB32,
114 DivB64,
115 DivB96,
116 DivB128,
117 DivB256,
118 DivB512,
119};
120
121// How to apply register bank on register operand.
122// In most cases, this serves as a LLT and register bank assert.
123// Can change operands and insert copies, extends, truncs, and read-any-lanes.
124// Anything more complicated requires LoweringMethod.
125enum RegBankLLTMappingApplyID {
126 InvalidMapping,
127 None,
128 IntrId,
129 Imm,
130 Vcc,
131
132 // sgpr scalars, pointers, vectors and B-types
133 Sgpr16,
134 Sgpr32,
135 Sgpr64,
136 Sgpr128,
137 SgprP1,
138 SgprP3,
139 SgprP4,
140 SgprP5,
141 SgprPtr32,
142 SgprPtr64,
143 SgprPtr128,
144 SgprV2S16,
145 SgprV4S32,
146 SgprV2S32,
147 SgprB32,
148 SgprB64,
149 SgprB96,
150 SgprB128,
151 SgprB256,
152 SgprB512,
153
154 // vgpr scalars, pointers, vectors and B-types
155 Vgpr16,
156 Vgpr32,
157 Vgpr64,
158 Vgpr128,
159 VgprP0,
160 VgprP1,
161 VgprP3,
162 VgprP4,
163 VgprP5,
164 VgprPtr32,
165 VgprPtr64,
166 VgprPtr128,
167 VgprV2S16,
168 VgprV2S32,
169 VgprB32,
170 VgprB64,
171 VgprB96,
172 VgprB128,
173 VgprB256,
174 VgprB512,
175 VgprV4S32,
176
177 // Dst only modifiers: read-any-lane and truncs
178 UniInVcc,
179 UniInVgprS32,
180 UniInVgprV2S16,
181 UniInVgprV4S32,
182 UniInVgprB32,
183 UniInVgprB64,
184 UniInVgprB96,
185 UniInVgprB128,
186 UniInVgprB256,
187 UniInVgprB512,
188
189 Sgpr32Trunc,
190
191 // Src only modifiers: waterfalls, extends
192 Sgpr32AExt,
193 Sgpr32AExtBoolInReg,
194 Sgpr32SExt,
195 Sgpr32ZExt,
196 Vgpr32SExt,
197 Vgpr32ZExt,
198};
199
200// Instruction needs to be replaced with sequence of instructions. Lowering was
201// not done by legalizer since instructions is available in either sgpr or vgpr.
202// For example S64 AND is available on sgpr, for that reason S64 AND is legal in
203// context of Legalizer that only checks LLT. But S64 AND is not available on
204// vgpr. Lower it to two S32 vgpr ANDs.
205enum LoweringMethodID {
206 DoNotLower,
207 VccExtToSel,
208 UniExtToSel,
209 UnpackBitShift,
210 S_BFE,
211 V_BFE,
212 VgprToVccCopy,
213 SplitTo32,
214 SplitTo32Select,
215 SplitTo32SExtInReg,
216 Ext32To64,
217 UniCstExt,
218 SplitLoad,
219 WidenLoad,
220};
221
222enum FastRulesTypes {
223 NoFastRules,
224 Standard, // S16, S32, S64, V2S16
225 StandardB, // B32, B64, B96, B128
226 Vector, // S32, V2S32, V3S32, V4S32
227};
228
229struct RegBankLLTMapping {
230 SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
231 SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
232 LoweringMethodID LoweringMethod;
233 RegBankLLTMapping(
234 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
235 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
236 LoweringMethodID LoweringMethod = DoNotLower);
237};
238
239struct PredicateMapping {
240 SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
241 std::function<bool(const MachineInstr &)> TestFunc;
242 PredicateMapping(
243 std::initializer_list<UniformityLLTOpPredicateID> OpList,
244 std::function<bool(const MachineInstr &)> TestFunc = nullptr);
245
246 bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
247 const MachineRegisterInfo &MRI) const;
248};
249
250struct RegBankLegalizeRule {
251 PredicateMapping Predicate;
252 RegBankLLTMapping OperandMapping;
253};
254
255class SetOfRulesForOpcode {
256 // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
257 SmallVector<RegBankLegalizeRule, 4> Rules;
258
259 // "Fast Rules"
260 // Instead of testing each 'Rules[i].Predicate' we do direct access to
261 // RegBankLLTMapping using getFastPredicateSlot. For example if:
262 // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
263 // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
264 FastRulesTypes FastTypes = NoFastRules;
265#define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
266 RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
267 RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
268
269public:
270 SetOfRulesForOpcode();
271 SetOfRulesForOpcode(FastRulesTypes FastTypes);
272
273 const RegBankLLTMapping &
274 findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
275 const MachineUniformityInfo &MUI) const;
276
277 void addRule(RegBankLegalizeRule Rule);
278
279 void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
280 RegBankLLTMapping RuleApplyIDs);
281 void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
282 RegBankLLTMapping RuleApplyIDs);
283
284private:
285 int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
286};
287
288// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
289// little more efficient.
290class RegBankLegalizeRules {
291 const GCNSubtarget *ST;
292 MachineRegisterInfo *MRI;
293 // Separate maps for G-opcodes and instrinsics since they are in different
294 // enums. Multiple opcodes can share same set of rules.
295 // RulesAlias = map<Opcode, KeyOpcode>
296 // Rules = map<KeyOpcode, SetOfRulesForOpcode>
297 SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
298 SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
299 SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
300 SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
301 class RuleSetInitializer {
302 SetOfRulesForOpcode *RuleSet;
303
304 public:
305 // Used for clang-format line breaks and to force writing all rules for
306 // opcode in same place.
307 template <class AliasMap, class RulesMap>
308 RuleSetInitializer(std::initializer_list<unsigned> OpcList,
309 AliasMap &RulesAlias, RulesMap &Rules,
310 FastRulesTypes FastTypes = NoFastRules) {
311 unsigned KeyOpcode = *OpcList.begin();
312 for (unsigned Opc : OpcList) {
313 [[maybe_unused]] auto [_, NewInput] =
314 RulesAlias.try_emplace(Opc, KeyOpcode);
315 assert(NewInput && "Can't redefine existing Rules");
316 }
317
318 auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
319 assert(NewInput && "Can't redefine existing Rules");
320
321 RuleSet = &DenseMapIter->second;
322 }
323
324 RuleSetInitializer(const RuleSetInitializer &) = delete;
325 RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
326 RuleSetInitializer(RuleSetInitializer &&) = delete;
327 RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
328 ~RuleSetInitializer() = default;
329
330 RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
331 RegBankLLTMapping RuleApplyIDs,
332 bool STPred = true) {
333 if (STPred)
334 RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
335 return *this;
336 }
337
338 RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
339 RegBankLLTMapping RuleApplyIDs,
340 bool STPred = true) {
341 if (STPred)
342 RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
343 return *this;
344 }
345
346 RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
347 if (STPred)
348 RuleSet->addRule(Rule: Init);
349 return *this;
350 }
351 };
352
353 RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
354 FastRulesTypes FastTypes = NoFastRules);
355
356 RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
357 FastRulesTypes FastTypes = NoFastRules);
358
359public:
360 // Initialize rules for all opcodes.
361 RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
362
363 // In case we don't want to regenerate same rules, we can use already
364 // generated rules but need to refresh references to objects that are
365 // created for this run.
366 void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
367 ST = &_ST;
368 MRI = &_MRI;
369 };
370
371 const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const;
372};
373
374} // end namespace AMDGPU
375} // end namespace llvm
376
377#endif
378