| 1 | //===- AMDGPURegBankLegalizeRules --------------------------------*- C++ -*-==// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H |
| 10 | #define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZERULES_H |
| 11 | |
| 12 | #include "llvm/ADT/DenseMap.h" |
| 13 | #include "llvm/ADT/SmallVector.h" |
| 14 | #include <functional> |
| 15 | |
| 16 | namespace llvm { |
| 17 | |
| 18 | class LLT; |
| 19 | class MachineRegisterInfo; |
| 20 | class MachineInstr; |
| 21 | class GCNSubtarget; |
| 22 | class MachineFunction; |
| 23 | template <typename T> class GenericUniformityInfo; |
| 24 | template <typename T> class GenericSSAContext; |
| 25 | using MachineSSAContext = GenericSSAContext<MachineFunction>; |
| 26 | using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>; |
| 27 | |
| 28 | namespace AMDGPU { |
| 29 | |
| 30 | /// \returns true if \p Ty is a pointer type with size \p Width. |
| 31 | bool isAnyPtr(LLT Ty, unsigned Width); |
| 32 | |
| 33 | // IDs used to build predicate for RegBankLegalizeRule. Predicate can have one |
| 34 | // or more IDs and each represents a check for 'uniform or divergent' + LLT or |
| 35 | // just LLT on register operand. |
| 36 | // Most often checking one operand is enough to decide which RegBankLLTMapping |
| 37 | // to apply (see Fast Rules), IDs are useful when two or more operands need to |
| 38 | // be checked. |
| 39 | enum UniformityLLTOpPredicateID { |
| 40 | _, |
| 41 | // scalars |
| 42 | S1, |
| 43 | S16, |
| 44 | S32, |
| 45 | S64, |
| 46 | S128, |
| 47 | |
| 48 | UniS1, |
| 49 | UniS16, |
| 50 | UniS32, |
| 51 | UniS64, |
| 52 | UniS128, |
| 53 | |
| 54 | DivS1, |
| 55 | DivS16, |
| 56 | DivS32, |
| 57 | DivS64, |
| 58 | DivS128, |
| 59 | |
| 60 | // pointers |
| 61 | P0, |
| 62 | P1, |
| 63 | P3, |
| 64 | P4, |
| 65 | P5, |
| 66 | Ptr32, |
| 67 | Ptr64, |
| 68 | Ptr128, |
| 69 | |
| 70 | UniP0, |
| 71 | UniP1, |
| 72 | UniP3, |
| 73 | UniP4, |
| 74 | UniP5, |
| 75 | UniPtr32, |
| 76 | UniPtr64, |
| 77 | UniPtr128, |
| 78 | |
| 79 | DivP0, |
| 80 | DivP1, |
| 81 | DivP3, |
| 82 | DivP4, |
| 83 | DivP5, |
| 84 | DivPtr32, |
| 85 | DivPtr64, |
| 86 | DivPtr128, |
| 87 | |
| 88 | // vectors |
| 89 | V2S16, |
| 90 | V2S32, |
| 91 | V3S32, |
| 92 | V4S32, |
| 93 | |
| 94 | UniV2S16, |
| 95 | |
| 96 | DivV2S16, |
| 97 | |
| 98 | // B types |
| 99 | B32, |
| 100 | B64, |
| 101 | B96, |
| 102 | B128, |
| 103 | B256, |
| 104 | B512, |
| 105 | |
| 106 | UniB32, |
| 107 | UniB64, |
| 108 | UniB96, |
| 109 | UniB128, |
| 110 | UniB256, |
| 111 | UniB512, |
| 112 | |
| 113 | DivB32, |
| 114 | DivB64, |
| 115 | DivB96, |
| 116 | DivB128, |
| 117 | DivB256, |
| 118 | DivB512, |
| 119 | }; |
| 120 | |
| 121 | // How to apply register bank on register operand. |
| 122 | // In most cases, this serves as a LLT and register bank assert. |
| 123 | // Can change operands and insert copies, extends, truncs, and read-any-lanes. |
| 124 | // Anything more complicated requires LoweringMethod. |
| 125 | enum RegBankLLTMappingApplyID { |
| 126 | InvalidMapping, |
| 127 | None, |
| 128 | IntrId, |
| 129 | Imm, |
| 130 | Vcc, |
| 131 | |
| 132 | // sgpr scalars, pointers, vectors and B-types |
| 133 | Sgpr16, |
| 134 | Sgpr32, |
| 135 | Sgpr64, |
| 136 | Sgpr128, |
| 137 | SgprP1, |
| 138 | SgprP3, |
| 139 | SgprP4, |
| 140 | SgprP5, |
| 141 | SgprPtr32, |
| 142 | SgprPtr64, |
| 143 | SgprPtr128, |
| 144 | SgprV2S16, |
| 145 | SgprV4S32, |
| 146 | SgprV2S32, |
| 147 | SgprB32, |
| 148 | SgprB64, |
| 149 | SgprB96, |
| 150 | SgprB128, |
| 151 | SgprB256, |
| 152 | SgprB512, |
| 153 | |
| 154 | // vgpr scalars, pointers, vectors and B-types |
| 155 | Vgpr16, |
| 156 | Vgpr32, |
| 157 | Vgpr64, |
| 158 | Vgpr128, |
| 159 | VgprP0, |
| 160 | VgprP1, |
| 161 | VgprP3, |
| 162 | VgprP4, |
| 163 | VgprP5, |
| 164 | VgprPtr32, |
| 165 | VgprPtr64, |
| 166 | VgprPtr128, |
| 167 | VgprV2S16, |
| 168 | VgprV2S32, |
| 169 | VgprB32, |
| 170 | VgprB64, |
| 171 | VgprB96, |
| 172 | VgprB128, |
| 173 | VgprB256, |
| 174 | VgprB512, |
| 175 | VgprV4S32, |
| 176 | |
| 177 | // Dst only modifiers: read-any-lane and truncs |
| 178 | UniInVcc, |
| 179 | UniInVgprS32, |
| 180 | UniInVgprV2S16, |
| 181 | UniInVgprV4S32, |
| 182 | UniInVgprB32, |
| 183 | UniInVgprB64, |
| 184 | UniInVgprB96, |
| 185 | UniInVgprB128, |
| 186 | UniInVgprB256, |
| 187 | UniInVgprB512, |
| 188 | |
| 189 | Sgpr32Trunc, |
| 190 | |
| 191 | // Src only modifiers: waterfalls, extends |
| 192 | Sgpr32AExt, |
| 193 | Sgpr32AExtBoolInReg, |
| 194 | Sgpr32SExt, |
| 195 | Sgpr32ZExt, |
| 196 | Vgpr32SExt, |
| 197 | Vgpr32ZExt, |
| 198 | }; |
| 199 | |
| 200 | // Instruction needs to be replaced with sequence of instructions. Lowering was |
| 201 | // not done by legalizer since instructions is available in either sgpr or vgpr. |
| 202 | // For example S64 AND is available on sgpr, for that reason S64 AND is legal in |
| 203 | // context of Legalizer that only checks LLT. But S64 AND is not available on |
| 204 | // vgpr. Lower it to two S32 vgpr ANDs. |
| 205 | enum LoweringMethodID { |
| 206 | DoNotLower, |
| 207 | VccExtToSel, |
| 208 | UniExtToSel, |
| 209 | UnpackBitShift, |
| 210 | S_BFE, |
| 211 | V_BFE, |
| 212 | VgprToVccCopy, |
| 213 | SplitTo32, |
| 214 | SplitTo32Select, |
| 215 | SplitTo32SExtInReg, |
| 216 | Ext32To64, |
| 217 | UniCstExt, |
| 218 | SplitLoad, |
| 219 | WidenLoad, |
| 220 | }; |
| 221 | |
| 222 | enum FastRulesTypes { |
| 223 | NoFastRules, |
| 224 | Standard, // S16, S32, S64, V2S16 |
| 225 | StandardB, // B32, B64, B96, B128 |
| 226 | Vector, // S32, V2S32, V3S32, V4S32 |
| 227 | }; |
| 228 | |
| 229 | struct RegBankLLTMapping { |
| 230 | SmallVector<RegBankLLTMappingApplyID, 2> DstOpMapping; |
| 231 | SmallVector<RegBankLLTMappingApplyID, 4> SrcOpMapping; |
| 232 | LoweringMethodID LoweringMethod; |
| 233 | RegBankLLTMapping( |
| 234 | std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList, |
| 235 | std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList, |
| 236 | LoweringMethodID LoweringMethod = DoNotLower); |
| 237 | }; |
| 238 | |
| 239 | struct PredicateMapping { |
| 240 | SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes; |
| 241 | std::function<bool(const MachineInstr &)> TestFunc; |
| 242 | PredicateMapping( |
| 243 | std::initializer_list<UniformityLLTOpPredicateID> OpList, |
| 244 | std::function<bool(const MachineInstr &)> TestFunc = nullptr); |
| 245 | |
| 246 | bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, |
| 247 | const MachineRegisterInfo &MRI) const; |
| 248 | }; |
| 249 | |
| 250 | struct RegBankLegalizeRule { |
| 251 | PredicateMapping Predicate; |
| 252 | RegBankLLTMapping OperandMapping; |
| 253 | }; |
| 254 | |
| 255 | class SetOfRulesForOpcode { |
| 256 | // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one. |
| 257 | SmallVector<RegBankLegalizeRule, 4> Rules; |
| 258 | |
| 259 | // "Fast Rules" |
| 260 | // Instead of testing each 'Rules[i].Predicate' we do direct access to |
| 261 | // RegBankLLTMapping using getFastPredicateSlot. For example if: |
| 262 | // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32 |
| 263 | // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32 |
| 264 | FastRulesTypes FastTypes = NoFastRules; |
| 265 | #define InvMapping RegBankLLTMapping({InvalidMapping}, {InvalidMapping}) |
| 266 | RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; |
| 267 | RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; |
| 268 | |
| 269 | public: |
| 270 | SetOfRulesForOpcode(); |
| 271 | SetOfRulesForOpcode(FastRulesTypes FastTypes); |
| 272 | |
| 273 | const RegBankLLTMapping & |
| 274 | findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, |
| 275 | const MachineUniformityInfo &MUI) const; |
| 276 | |
| 277 | void addRule(RegBankLegalizeRule Rule); |
| 278 | |
| 279 | void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, |
| 280 | RegBankLLTMapping RuleApplyIDs); |
| 281 | void addFastRuleUniform(UniformityLLTOpPredicateID Ty, |
| 282 | RegBankLLTMapping RuleApplyIDs); |
| 283 | |
| 284 | private: |
| 285 | int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const; |
| 286 | }; |
| 287 | |
| 288 | // Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a |
| 289 | // little more efficient. |
| 290 | class RegBankLegalizeRules { |
| 291 | const GCNSubtarget *ST; |
| 292 | MachineRegisterInfo *MRI; |
| 293 | // Separate maps for G-opcodes and instrinsics since they are in different |
| 294 | // enums. Multiple opcodes can share same set of rules. |
| 295 | // RulesAlias = map<Opcode, KeyOpcode> |
| 296 | // Rules = map<KeyOpcode, SetOfRulesForOpcode> |
| 297 | SmallDenseMap<unsigned, unsigned, 256> GRulesAlias; |
| 298 | SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules; |
| 299 | SmallDenseMap<unsigned, unsigned, 128> IRulesAlias; |
| 300 | SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules; |
| 301 | class RuleSetInitializer { |
| 302 | SetOfRulesForOpcode *RuleSet; |
| 303 | |
| 304 | public: |
| 305 | // Used for clang-format line breaks and to force writing all rules for |
| 306 | // opcode in same place. |
| 307 | template <class AliasMap, class RulesMap> |
| 308 | RuleSetInitializer(std::initializer_list<unsigned> OpcList, |
| 309 | AliasMap &RulesAlias, RulesMap &Rules, |
| 310 | FastRulesTypes FastTypes = NoFastRules) { |
| 311 | unsigned KeyOpcode = *OpcList.begin(); |
| 312 | for (unsigned Opc : OpcList) { |
| 313 | [[maybe_unused]] auto [_, NewInput] = |
| 314 | RulesAlias.try_emplace(Opc, KeyOpcode); |
| 315 | assert(NewInput && "Can't redefine existing Rules" ); |
| 316 | } |
| 317 | |
| 318 | auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes); |
| 319 | assert(NewInput && "Can't redefine existing Rules" ); |
| 320 | |
| 321 | RuleSet = &DenseMapIter->second; |
| 322 | } |
| 323 | |
| 324 | RuleSetInitializer(const RuleSetInitializer &) = delete; |
| 325 | RuleSetInitializer &operator=(const RuleSetInitializer &) = delete; |
| 326 | RuleSetInitializer(RuleSetInitializer &&) = delete; |
| 327 | RuleSetInitializer &operator=(RuleSetInitializer &&) = delete; |
| 328 | ~RuleSetInitializer() = default; |
| 329 | |
| 330 | RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty, |
| 331 | RegBankLLTMapping RuleApplyIDs, |
| 332 | bool STPred = true) { |
| 333 | if (STPred) |
| 334 | RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs); |
| 335 | return *this; |
| 336 | } |
| 337 | |
| 338 | RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty, |
| 339 | RegBankLLTMapping RuleApplyIDs, |
| 340 | bool STPred = true) { |
| 341 | if (STPred) |
| 342 | RuleSet->addFastRuleUniform(Ty, RuleApplyIDs); |
| 343 | return *this; |
| 344 | } |
| 345 | |
| 346 | RuleSetInitializer &Any(RegBankLegalizeRule Init, bool STPred = true) { |
| 347 | if (STPred) |
| 348 | RuleSet->addRule(Rule: Init); |
| 349 | return *this; |
| 350 | } |
| 351 | }; |
| 352 | |
| 353 | RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList, |
| 354 | FastRulesTypes FastTypes = NoFastRules); |
| 355 | |
| 356 | RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList, |
| 357 | FastRulesTypes FastTypes = NoFastRules); |
| 358 | |
| 359 | public: |
| 360 | // Initialize rules for all opcodes. |
| 361 | RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI); |
| 362 | |
| 363 | // In case we don't want to regenerate same rules, we can use already |
| 364 | // generated rules but need to refresh references to objects that are |
| 365 | // created for this run. |
| 366 | void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) { |
| 367 | ST = &_ST; |
| 368 | MRI = &_MRI; |
| 369 | }; |
| 370 | |
| 371 | const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const; |
| 372 | }; |
| 373 | |
| 374 | } // end namespace AMDGPU |
| 375 | } // end namespace llvm |
| 376 | |
| 377 | #endif |
| 378 | |