1//===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallVector.h"
14#include <functional>
15
16namespace llvm {
17
18class LLT;
19class MachineRegisterInfo;
20class MachineInstr;
21class GCNSubtarget;
22class MachineFunction;
23template <typename T> class GenericUniformityInfo;
24template <typename T> class GenericSSAContext;
25using MachineSSAContext = GenericSSAContext<MachineFunction>;
26using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
27
28namespace AMDGPU {
29
30/// \returns true if \p Ty is a pointer type with size \p Width.
31bool isAnyPtr(LLT Ty, unsigned Width);
32
33// IDs used to build predicate for RegBankLegalizeRule. Predicate can have one
34// or more IDs and each represents a check for 'uniform or divergent' + LLT or
35// just LLT on register operand.
36// Most often checking one operand is enough to decide which RegBankLLTMapping
37// to apply (see Fast Rules), IDs are useful when two or more operands need to
38// be checked.
39enum UniformityLLTOpPredicateID {
40 // Represents non-register and physical register operands.
41 _,
42 // scalars
43 S1,
44 S16,
45 S32,
46 S64,
47 S128,
48
49 UniS1,
50 UniS16,
51 UniS32,
52 UniS64,
53 UniS128,
54
55 DivS1,
56 DivS16,
57 DivS32,
58 DivS64,
59 DivS128,
60
61 // pointers
62 P0,
63 P1,
64 P2,
65 P3,
66 P4,
67 P5,
68 P8,
69 Ptr32,
70 Ptr64,
71 Ptr128,
72
73 UniP0,
74 UniP1,
75 UniP2,
76 UniP3,
77 UniP4,
78 UniP5,
79 UniP8,
80 UniPtr32,
81 UniPtr64,
82 UniPtr128,
83
84 DivP0,
85 DivP1,
86 DivP2,
87 DivP3,
88 DivP4,
89 DivP5,
90 DivPtr32,
91 DivPtr64,
92 DivPtr128,
93
94 // vectors
95 V2S16,
96 V2S32,
97 V2S64,
98 V3S32,
99 V4S32,
100
101 UniV2S16,
102 UniV2S32,
103 UniV2S64,
104
105 DivV2S16,
106 DivV2S32,
107 DivV2S64,
108
109 // B types
110 B32,
111 B64,
112 B96,
113 B128,
114 B160,
115 B256,
116 B512,
117
118 UniB32,
119 UniB64,
120 UniB96,
121 UniB128,
122 UniB160,
123 UniB256,
124 UniB512,
125 UniBRC,
126
127 DivB32,
128 DivB64,
129 DivB96,
130 DivB128,
131 DivB160,
132 DivB256,
133 DivB512,
134 DivBRC
135};
136
137// How to apply register bank on register operand.
138// In most cases, this serves as a LLT and register bank assert.
139// Can change operands and insert copies, extends, truncs, and read-any-lanes.
140// Anything more complicated requires LoweringMethod.
141enum RegBankLLTMappingApplyID {
142 InvalidMapping,
143 None,
144 IntrId,
145 Imm,
146 Vcc,
147
148 // sgpr scalars, pointers, vectors and B-types
149 Sgpr16,
150 Sgpr32,
151 Sgpr64,
152 Sgpr128,
153 SgprP0,
154 SgprP1,
155 SgprP2,
156 SgprP3,
157 SgprP4,
158 SgprP5,
159 SgprP8,
160 SgprPtr32,
161 SgprPtr64,
162 SgprPtr128,
163 SgprV2S16,
164 SgprV4S32,
165 SgprV2S32,
166 SgprB32,
167 SgprB64,
168 SgprB96,
169 SgprB128,
170 SgprB256,
171 SgprB512,
172 SgprBRC,
173
174 // vgpr scalars, pointers, vectors and B-types
175 Vgpr16,
176 Vgpr32,
177 Vgpr64,
178 Vgpr128,
179 VgprP0,
180 VgprP1,
181 VgprP2,
182 VgprP3,
183 VgprP4,
184 VgprP5,
185 VgprPtr32,
186 VgprPtr64,
187 VgprPtr128,
188 VgprV2S16,
189 VgprV2S32,
190 VgprV3S32,
191 VgprB32,
192 VgprB64,
193 VgprB96,
194 VgprB128,
195 VgprB160,
196 VgprB256,
197 VgprB512,
198 VgprBRC,
199 VgprV4S32,
200 VgprV2S64,
201
202 // Dst only modifiers: read-any-lane and truncs
203 UniInVcc,
204 UniInVgprS16,
205 UniInVgprS32,
206 UniInVgprS64,
207 UniInVgprV2S16,
208 UniInVgprV2S32,
209 UniInVgprV4S32,
210 UniInVgprV2S64,
211 UniInVgprB32,
212 UniInVgprB64,
213 UniInVgprB96,
214 UniInVgprB128,
215 UniInVgprB160,
216 UniInVgprB256,
217 UniInVgprB512,
218
219 Sgpr32Trunc,
220
221 // Src only modifiers: execute in waterfall loop if divergent
222 Sgpr32_WF,
223 SgprV4S32_WF,
224
225 // Src only modifiers: execute in waterfall loop for calls
226 SgprP0Call_WF,
227 SgprP4Call_WF,
228
229 // Src only modifiers: extends
230 Sgpr32AExt,
231 Sgpr32AExtBoolInReg,
232 Sgpr32SExt,
233 Sgpr32ZExt,
234 Vgpr32AExt,
235 Vgpr32SExt,
236 Vgpr32ZExt,
237};
238
239// Instruction needs to be replaced with sequence of instructions. Lowering was
240// not done by legalizer since instructions is available in either sgpr or vgpr.
241// For example S64 AND is available on sgpr, for that reason S64 AND is legal in
242// context of Legalizer that only checks LLT. But S64 AND is not available on
243// vgpr. Lower it to two S32 vgpr ANDs.
244enum LoweringMethodID {
245 DoNotLower,
246 VccExtToSel,
247 UniExtToSel,
248 UnpackBitShift,
249 UnpackMinMax,
250 S_BFE,
251 V_BFE,
252 VgprToVccCopy,
253 UniMAD64,
254 UniMul64,
255 DivSMulToMAD,
256 SplitTo32,
257 SplitTo32Mul,
258 ScalarizeToS16,
259 SplitTo32Select,
260 SplitTo32SExtInReg,
261 Ext32To64,
262 UniCstExt,
263 SplitLoad,
264 WidenLoad,
265 WidenMMOToS32,
266 UnpackAExt,
267 VerifyAllSgpr,
268 ApplyAllVgpr,
269 UnmergeToShiftTrunc,
270 ApplyINTRIN_IMAGE
271};
272
273enum FastRulesTypes {
274 NoFastRules,
275 Standard, // S16, S32, S64, V2S16
276 StandardB, // B32, B64, B96, B128
277 Vector, // S32, V2S32, V3S32, V4S32
278};
279
280struct RegBankLLTMapping {
281 SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping;
282 SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping;
283 LoweringMethodID LoweringMethod;
284 RegBankLLTMapping(
285 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
286 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
287 LoweringMethodID LoweringMethod = DoNotLower);
288};
289
290struct PredicateMapping {
291 SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes;
292 std::function<bool(const MachineInstr &)> TestFunc;
293 PredicateMapping(
294 std::initializer_list<UniformityLLTOpPredicateID> OpList,
295 std::function<bool(const MachineInstr &)> TestFunc = nullptr);
296
297 bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI,
298 const MachineRegisterInfo &MRI) const;
299};
300
301struct RegBankLegalizeRule {
302 PredicateMapping Predicate;
303 RegBankLLTMapping OperandMapping;
304};
305
306class SetOfRulesForOpcode {
307 // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one.
308 SmallVector<RegBankLegalizeRule, 4> Rules;
309
310 // "Fast Rules"
311 // Instead of testing each 'Rules[i].Predicate' we do direct access to
312 // RegBankLLTMapping using getFastPredicateSlot. For example if:
313 // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32
314 // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32
315 FastRulesTypes FastTypes = NoFastRules;
316#define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping})
317 RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
318 RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping};
319
320public:
321 SetOfRulesForOpcode();
322 SetOfRulesForOpcode(FastRulesTypes FastTypes);
323
324 const RegBankLLTMapping *
325 findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
326 const MachineUniformityInfo &MUI) const;
327
328 void addRule(RegBankLegalizeRule Rule);
329
330 void addFastRuleDivergent(UniformityLLTOpPredicateID Ty,
331 RegBankLLTMapping RuleApplyIDs);
332 void addFastRuleUniform(UniformityLLTOpPredicateID Ty,
333 RegBankLLTMapping RuleApplyIDs);
334
335private:
336 int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const;
337};
338
339// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a
340// little more efficient.
341class RegBankLegalizeRules {
342 const GCNSubtarget *ST;
343 MachineRegisterInfo *MRI;
344 // Separate maps for G-opcodes and intrinsics since they are in different
345 // enums. Multiple opcodes can share same set of rules.
346 // RulesAlias = map<Opcode, KeyOpcode>
347 // Rules = map<KeyOpcode, SetOfRulesForOpcode>
348 SmallDenseMap<unsigned, unsigned, 256> GRulesAlias;
349 SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules;
350 SmallDenseMap<unsigned, unsigned, 128> IRulesAlias;
351 SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules;
352 class RuleSetInitializer {
353 SetOfRulesForOpcode *RuleSet;
354
355 public:
356 // Used for clang-format line breaks and to force writing all rules for
357 // opcode in same place.
358 template <class AliasMap, class RulesMap>
359 RuleSetInitializer(std::initializer_list<unsigned> OpcList,
360 AliasMap &RulesAlias, RulesMap &Rules,
361 FastRulesTypes FastTypes = NoFastRules) {
362 unsigned KeyOpcode = *OpcList.begin();
363 for (unsigned Opc : OpcList) {
364 [[maybe_unused]] auto [_, NewInput] =
365 RulesAlias.try_emplace(Opc, KeyOpcode);
366 assert(NewInput && "Can't redefine existing Rules");
367 }
368
369 auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes);
370 assert(NewInput && "Can't redefine existing Rules");
371
372 RuleSet = &DenseMapIter->second;
373 }
374
375 RuleSetInitializer(const RuleSetInitializer &) = delete;
376 RuleSetInitializer &operator=(const RuleSetInitializer &) = delete;
377 RuleSetInitializer(RuleSetInitializer &&) = delete;
378 RuleSetInitializer &operator=(RuleSetInitializer &&) = delete;
379 ~RuleSetInitializer() = default;
380
381 RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty,
382 RegBankLLTMapping RuleApplyIDs,
383 bool STPred = true) {
384 if (STPred)
385 RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs);
386 return *this;
387 }
388
389 RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty,
390 RegBankLLTMapping RuleApplyIDs,
391 bool STPred = true) {
392 if (STPred)
393 RuleSet->addFastRuleUniform(Ty, RuleApplyIDs);
394 return *this;
395 }
396
397 RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) {
398 if (STPred)
399 RuleSet->addRule(Rule: Init);
400 return *this;
401 }
402 };
403
404 RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
405 FastRulesTypes FastTypes = NoFastRules);
406
407 RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
408 FastRulesTypes FastTypes = NoFastRules);
409
410public:
411 // Initialize rules for all opcodes.
412 RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI);
413
414 // In case we don't want to regenerate same rules, we can use already
415 // generated rules but need to refresh references to objects that are
416 // created for this run.
417 void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) {
418 ST = &_ST;
419 MRI = &_MRI;
420 };
421
422 const SetOfRulesForOpcode *getRulesForOpc(MachineInstr &MI) const;
423};
424
425} // end namespace AMDGPU
426} // end namespace llvm
427
428#endif
429