1 | //===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H |
10 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H |
11 | |
12 | #include "llvm/ADT/DenseMap.h" |
13 | #include "llvm/ADT/SmallVector.h" |
14 | #include <functional> |
15 | |
16 | namespace llvm { |
17 | |
18 | class LLT; |
19 | class MachineRegisterInfo; |
20 | class MachineInstr; |
21 | class GCNSubtarget; |
22 | class MachineFunction; |
23 | template <typename T> class GenericUniformityInfo; |
24 | template <typename T> class GenericSSAContext; |
25 | using MachineSSAContext = GenericSSAContext<MachineFunction>; |
26 | using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>; |
27 | |
28 | namespace AMDGPU { |
29 | |
30 | /// \returns true if \p Ty is a pointer type with size \p Width. |
31 | bool isAnyPtr(LLT Ty, unsigned Width); |
32 | |
33 | // IDs used to build predicate for RegBankLegalizeRule. Predicate can have one |
34 | // or more IDs and each represents a check for 'uniform or divergent' + LLT or |
35 | // just LLT on register operand. |
36 | // Most often checking one operand is enough to decide which RegBankLLTMapping |
37 | // to apply (see Fast Rules), IDs are useful when two or more operands need to |
38 | // be checked. |
39 | enum UniformityLLTOpPredicateID { |
40 | _, |
41 | // scalars |
42 | S1, |
43 | S16, |
44 | S32, |
45 | S64, |
46 | S128, |
47 | |
48 | UniS1, |
49 | UniS16, |
50 | UniS32, |
51 | UniS64, |
52 | UniS128, |
53 | |
54 | DivS1, |
55 | DivS16, |
56 | DivS32, |
57 | DivS64, |
58 | DivS128, |
59 | |
60 | // pointers |
61 | P0, |
62 | P1, |
63 | P3, |
64 | P4, |
65 | P5, |
66 | Ptr32, |
67 | Ptr64, |
68 | Ptr128, |
69 | |
70 | UniP0, |
71 | UniP1, |
72 | UniP3, |
73 | UniP4, |
74 | UniP5, |
75 | UniPtr32, |
76 | UniPtr64, |
77 | UniPtr128, |
78 | |
79 | DivP0, |
80 | DivP1, |
81 | DivP3, |
82 | DivP4, |
83 | DivP5, |
84 | DivPtr32, |
85 | DivPtr64, |
86 | DivPtr128, |
87 | |
88 | // vectors |
89 | V2S16, |
90 | V2S32, |
91 | V3S32, |
92 | V4S32, |
93 | |
94 | UniV2S16, |
95 | |
96 | DivV2S16, |
97 | |
98 | // B types |
99 | B32, |
100 | B64, |
101 | B96, |
102 | B128, |
103 | B256, |
104 | B512, |
105 | |
106 | UniB32, |
107 | UniB64, |
108 | UniB96, |
109 | UniB128, |
110 | UniB256, |
111 | UniB512, |
112 | |
113 | DivB32, |
114 | DivB64, |
115 | DivB96, |
116 | DivB128, |
117 | DivB256, |
118 | DivB512, |
119 | }; |
120 | |
121 | // How to apply register bank on register operand. |
122 | // In most cases, this serves as a LLT and register bank assert. |
123 | // Can change operands and insert copies, extends, truncs, and read-any-lanes. |
124 | // Anything more complicated requires LoweringMethod. |
125 | enum RegBankLLTMappingApplyID { |
126 | InvalidMapping, |
127 | None, |
128 | IntrId, |
129 | Imm, |
130 | Vcc, |
131 | |
132 | // sgpr scalars, pointers, vectors and B-types |
133 | Sgpr16, |
134 | Sgpr32, |
135 | Sgpr64, |
136 | Sgpr128, |
137 | SgprP1, |
138 | SgprP3, |
139 | SgprP4, |
140 | SgprP5, |
141 | SgprPtr32, |
142 | SgprPtr64, |
143 | SgprPtr128, |
144 | SgprV2S16, |
145 | SgprV4S32, |
146 | SgprV2S32, |
147 | SgprB32, |
148 | SgprB64, |
149 | SgprB96, |
150 | SgprB128, |
151 | SgprB256, |
152 | SgprB512, |
153 | |
154 | // vgpr scalars, pointers, vectors and B-types |
155 | Vgpr16, |
156 | Vgpr32, |
157 | Vgpr64, |
158 | Vgpr128, |
159 | VgprP0, |
160 | VgprP1, |
161 | VgprP3, |
162 | VgprP4, |
163 | VgprP5, |
164 | VgprPtr32, |
165 | VgprPtr64, |
166 | VgprPtr128, |
167 | VgprV2S16, |
168 | VgprV2S32, |
169 | VgprB32, |
170 | VgprB64, |
171 | VgprB96, |
172 | VgprB128, |
173 | VgprB256, |
174 | VgprB512, |
175 | VgprV4S32, |
176 | |
177 | // Dst only modifiers: read-any-lane and truncs |
178 | UniInVcc, |
179 | UniInVgprS32, |
180 | UniInVgprV2S16, |
181 | UniInVgprV4S32, |
182 | UniInVgprB32, |
183 | UniInVgprB64, |
184 | UniInVgprB96, |
185 | UniInVgprB128, |
186 | UniInVgprB256, |
187 | UniInVgprB512, |
188 | |
189 | Sgpr32Trunc, |
190 | |
191 | // Src only modifiers: waterfalls, extends |
192 | Sgpr32AExt, |
193 | Sgpr32AExtBoolInReg, |
194 | Sgpr32SExt, |
195 | Sgpr32ZExt, |
196 | Vgpr32SExt, |
197 | Vgpr32ZExt, |
198 | }; |
199 | |
200 | // Instruction needs to be replaced with sequence of instructions. Lowering was |
201 | // not done by legalizer since instructions is available in either sgpr or vgpr. |
202 | // For example S64 AND is available on sgpr, for that reason S64 AND is legal in |
203 | // context of Legalizer that only checks LLT. But S64 AND is not available on |
204 | // vgpr. Lower it to two S32 vgpr ANDs. |
205 | enum LoweringMethodID { |
206 | DoNotLower, |
207 | VccExtToSel, |
208 | UniExtToSel, |
209 | UnpackBitShift, |
210 | S_BFE, |
211 | V_BFE, |
212 | VgprToVccCopy, |
213 | SplitTo32, |
214 | SplitTo32Select, |
215 | SplitTo32SExtInReg, |
216 | Ext32To64, |
217 | UniCstExt, |
218 | SplitLoad, |
219 | WidenLoad, |
220 | }; |
221 | |
222 | enum FastRulesTypes { |
223 | NoFastRules, |
224 | Standard, // S16, S32, S64, V2S16 |
225 | StandardB, // B32, B64, B96, B128 |
226 | Vector, // S32, V2S32, V3S32, V4S32 |
227 | }; |
228 | |
229 | struct RegBankLLTMapping { |
230 | SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping; |
231 | SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping; |
232 | LoweringMethodID LoweringMethod; |
233 | RegBankLLTMapping( |
234 | std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList, |
235 | std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList, |
236 | LoweringMethodID LoweringMethod = DoNotLower); |
237 | }; |
238 | |
239 | struct PredicateMapping { |
240 | SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes; |
241 | std::function<bool(const MachineInstr &)> TestFunc; |
242 | PredicateMapping( |
243 | std::initializer_list<UniformityLLTOpPredicateID> OpList, |
244 | std::function<bool(const MachineInstr &)> TestFunc = nullptr); |
245 | |
246 | bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, |
247 | const MachineRegisterInfo &MRI) const; |
248 | }; |
249 | |
250 | struct RegBankLegalizeRule { |
251 | PredicateMapping Predicate; |
252 | RegBankLLTMapping OperandMapping; |
253 | }; |
254 | |
255 | class SetOfRulesForOpcode { |
256 | // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one. |
257 | SmallVector<RegBankLegalizeRule, 4> Rules; |
258 | |
259 | // "Fast Rules" |
260 | // Instead of testing each 'Rules[i].Predicate' we do direct access to |
261 | // RegBankLLTMapping using getFastPredicateSlot. For example if: |
262 | // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32 |
263 | // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32 |
264 | FastRulesTypes FastTypes = NoFastRules; |
265 | #define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping}) |
266 | RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; |
267 | RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; |
268 | |
269 | public: |
270 | SetOfRulesForOpcode(); |
271 | SetOfRulesForOpcode(FastRulesTypes FastTypes); |
272 | |
273 | const RegBankLLTMapping & |
274 | findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, |
275 | const MachineUniformityInfo &MUI) const; |
276 | |
277 | void addRule(RegBankLegalizeRule Rule); |
278 | |
279 | void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, |
280 | RegBankLLTMapping RuleApplyIDs); |
281 | void addFastRuleUniform(UniformityLLTOpPredicateID Ty, |
282 | RegBankLLTMapping RuleApplyIDs); |
283 | |
284 | private: |
285 | int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const; |
286 | }; |
287 | |
288 | // Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a |
289 | // little more efficient. |
290 | class RegBankLegalizeRules { |
291 | const GCNSubtarget *ST; |
292 | MachineRegisterInfo *MRI; |
293 | // Separate maps for G-opcodes and instrinsics since they are in different |
294 | // enums. Multiple opcodes can share same set of rules. |
295 | // RulesAlias = map<Opcode, KeyOpcode> |
296 | // Rules = map<KeyOpcode, SetOfRulesForOpcode> |
297 | SmallDenseMap<unsigned, unsigned, 256> GRulesAlias; |
298 | SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules; |
299 | SmallDenseMap<unsigned, unsigned, 128> IRulesAlias; |
300 | SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules; |
301 | class RuleSetInitializer { |
302 | SetOfRulesForOpcode *RuleSet; |
303 | |
304 | public: |
305 | // Used for clang-format line breaks and to force writing all rules for |
306 | // opcode in same place. |
307 | template <class AliasMap, class RulesMap> |
308 | RuleSetInitializer(std::initializer_list<unsigned> OpcList, |
309 | AliasMap &RulesAlias, RulesMap &Rules, |
310 | FastRulesTypes FastTypes = NoFastRules) { |
311 | unsigned KeyOpcode = *OpcList.begin(); |
312 | for (unsigned Opc : OpcList) { |
313 | [[maybe_unused]] auto [_, NewInput] = |
314 | RulesAlias.try_emplace(Opc, KeyOpcode); |
315 | assert(NewInput && "Can't redefine existing Rules" ); |
316 | } |
317 | |
318 | auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes); |
319 | assert(NewInput && "Can't redefine existing Rules" ); |
320 | |
321 | RuleSet = &DenseMapIter->second; |
322 | } |
323 | |
324 | RuleSetInitializer(const RuleSetInitializer &) = delete; |
325 | RuleSetInitializer &operator=(const RuleSetInitializer &) = delete; |
326 | RuleSetInitializer(RuleSetInitializer &&) = delete; |
327 | RuleSetInitializer &operator=(RuleSetInitializer &&) = delete; |
328 | ~RuleSetInitializer() = default; |
329 | |
330 | RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty, |
331 | RegBankLLTMapping RuleApplyIDs, |
332 | bool STPred = true) { |
333 | if (STPred) |
334 | RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs); |
335 | return *this; |
336 | } |
337 | |
338 | RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty, |
339 | RegBankLLTMapping RuleApplyIDs, |
340 | bool STPred = true) { |
341 | if (STPred) |
342 | RuleSet->addFastRuleUniform(Ty, RuleApplyIDs); |
343 | return *this; |
344 | } |
345 | |
346 | RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) { |
347 | if (STPred) |
348 | RuleSet->addRule(Rule: Init); |
349 | return *this; |
350 | } |
351 | }; |
352 | |
353 | RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList, |
354 | FastRulesTypes FastTypes = NoFastRules); |
355 | |
356 | RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList, |
357 | FastRulesTypes FastTypes = NoFastRules); |
358 | |
359 | public: |
360 | // Initialize rules for all opcodes. |
361 | RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI); |
362 | |
363 | // In case we don't want to regenerate same rules, we can use already |
364 | // generated rules but need to refresh references to objects that are |
365 | // created for this run. |
366 | void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) { |
367 | ST = &_ST; |
368 | MRI = &_MRI; |
369 | }; |
370 | |
371 | const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const; |
372 | }; |
373 | |
374 | } // end namespace AMDGPU |
375 | } // end namespace llvm |
376 | |
377 | #endif |
378 | |