1//===- AMDGPURegBankLegalizeHelper ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKLEGALIZEHELPER_H
11
12#include "AMDGPURegBankLegalizeRules.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
16#include "llvm/CodeGen/MachineRegisterInfo.h"
17
18namespace llvm {
19
20class MachineIRBuilder;
21class SIInstrInfo;
22class SIMachineFunctionInfo;
23class GISelValueTracking;
24
25namespace AMDGPU {
26
27/// Holds waterfall loop information: the set of SGPR operand registers that
28/// need waterfalling, and an instruction range [Start, End) to wrap in the
29/// loop.
30struct WaterfallInfo {
31 SmallSet<Register, 4> SgprWaterfallOperandRegs;
32 MachineBasicBlock::iterator Start;
33 MachineBasicBlock::iterator End;
34};
35
36// Receives list of RegBankLLTMappingApplyID and applies register banks on all
37// operands. It is user's responsibility to provide RegBankLLTMappingApplyIDs
38// for all register operands, there is no need to specify NonReg for trailing
39// imm operands. This finishes selection of register banks if there is no need
40// to replace instruction. In other case InstApplyMethod will create new
41// instruction(s).
42class RegBankLegalizeHelper {
43 MachineFunction &MF;
44 const SIMachineFunctionInfo *MFI;
45 const GCNSubtarget &ST;
46 const SIInstrInfo &TII;
47 MachineIRBuilder &B;
48 MachineRegisterInfo &MRI;
49 const MachineUniformityInfo &MUI;
50 GISelValueTracking *VT;
51 const RegisterBankInfo &RBI;
52 MachineOptimizationRemarkEmitter MORE;
53 const RegBankLegalizeRules &RBLRules;
54 const bool IsWave32;
55 const RegisterBank *SgprRB;
56 const RegisterBank *VgprRB;
57 const RegisterBank *AgprRB;
58 const RegisterBank *VccRB;
59
60 static constexpr LLT S1 = LLT::scalar(SizeInBits: 1);
61 static constexpr LLT S16 = LLT::scalar(SizeInBits: 16);
62 static constexpr LLT S32 = LLT::scalar(SizeInBits: 32);
63 static constexpr LLT S64 = LLT::scalar(SizeInBits: 64);
64 static constexpr LLT S96 = LLT::scalar(SizeInBits: 96);
65 static constexpr LLT S128 = LLT::scalar(SizeInBits: 128);
66 static constexpr LLT S256 = LLT::scalar(SizeInBits: 256);
67
68 static constexpr LLT V2S16 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 16);
69 static constexpr LLT V4S16 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16);
70 static constexpr LLT V6S16 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 16);
71 static constexpr LLT V8S16 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16);
72 static constexpr LLT V16S16 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 16);
73 static constexpr LLT V32S16 = LLT::fixed_vector(NumElements: 32, ScalarSizeInBits: 16);
74
75 static constexpr LLT V2S32 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32);
76 static constexpr LLT V3S32 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 32);
77 static constexpr LLT V4S32 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32);
78 static constexpr LLT V6S32 = LLT::fixed_vector(NumElements: 6, ScalarSizeInBits: 32);
79 static constexpr LLT V7S32 = LLT::fixed_vector(NumElements: 7, ScalarSizeInBits: 32);
80 static constexpr LLT V8S32 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 32);
81 static constexpr LLT V16S32 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 32);
82
83 static constexpr LLT V2S64 = LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64);
84 static constexpr LLT V3S64 = LLT::fixed_vector(NumElements: 3, ScalarSizeInBits: 64);
85 static constexpr LLT V4S64 = LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 64);
86 static constexpr LLT V8S64 = LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 64);
87 static constexpr LLT V16S64 = LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 64);
88
89 static constexpr LLT P1 = LLT::pointer(AddressSpace: 1, SizeInBits: 64);
90 static constexpr LLT P4 = LLT::pointer(AddressSpace: 4, SizeInBits: 64);
91 static constexpr LLT P6 = LLT::pointer(AddressSpace: 6, SizeInBits: 32);
92
93 MachineRegisterInfo::VRegAttrs SgprRB_S32 = {.RCOrRB: SgprRB, .Ty: S32};
94 MachineRegisterInfo::VRegAttrs SgprRB_S16 = {.RCOrRB: SgprRB, .Ty: S16};
95 MachineRegisterInfo::VRegAttrs VgprRB_S32 = {.RCOrRB: VgprRB, .Ty: S32};
96 MachineRegisterInfo::VRegAttrs VccRB_S1 = {.RCOrRB: VccRB, .Ty: S1};
97
98public:
99 RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI,
100 GISelValueTracking *VT, const RegisterBankInfo &RBI,
101 const RegBankLegalizeRules &RBLRules);
102
103 bool findRuleAndApplyMapping(MachineInstr &MI);
104
105private:
106 bool executeInWaterfallLoop(MachineIRBuilder &B, const WaterfallInfo &WFI);
107
108 LLT getTyFromID(RegBankLLTMappingApplyID ID);
109 LLT getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty);
110
111 const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID);
112
113 bool
114 applyMappingDst(MachineInstr &MI, unsigned &OpIdx,
115 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs);
116
117 bool
118 applyMappingSrc(MachineInstr &MI, unsigned &OpIdx,
119 const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
120 WaterfallInfo &WFI);
121
122 unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset,
123 Register &VOffsetReg, Register &SOffsetReg,
124 int64_t &InstOffsetVal, Align Alignment);
125
126 bool splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown,
127 LLT MergeTy = LLT());
128 bool widenLoad(MachineInstr &MI, LLT WideTy, LLT MergeTy = LLT());
129 bool widenMMOToS32(GAnyLoad &MI) const;
130
131 bool lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
132 WaterfallInfo &WFI);
133
134 bool lowerVccExtToSel(MachineInstr &MI);
135 std::pair<Register, Register> unpackZExt(Register Reg);
136 std::pair<Register, Register> unpackSExt(Register Reg);
137 std::pair<Register, Register> unpackAExt(Register Reg);
138 std::pair<Register, Register> unpackAExtTruncS16(Register Reg);
139 bool lowerUnpackBitShift(MachineInstr &MI);
140 bool lowerV_BFE(MachineInstr &MI);
141 bool lowerS_BFE(MachineInstr &MI);
142 bool lowerUniMAD64(MachineInstr &MI);
143 bool lowerSplitTo32(MachineInstr &MI);
144 bool lowerSplitTo32Mul(MachineInstr &MI);
145 bool lowerSplitTo16(MachineInstr &MI);
146 bool lowerSplitTo32Select(MachineInstr &MI);
147 bool lowerSplitTo32SExtInReg(MachineInstr &MI);
148 bool lowerSplitBitCount64To32(MachineInstr &MI);
149 bool lowerUnpackMinMax(MachineInstr &MI);
150 bool lowerUnpackAExt(MachineInstr &MI);
151 bool lowerSBufToBuf(MachineInstr &MI, WaterfallInfo &WFI);
152 bool lowerExtrVecEltToSel(MachineInstr &MI);
153 bool lowerExtrVecEltTo32(MachineInstr &MI);
154 bool lowerInsVecEltToSel(MachineInstr &MI);
155 bool lowerInsVecEltTo32(MachineInstr &MI);
156 bool lowerAbsToNegMax(MachineInstr &MI);
157 bool lowerAbsToS32(MachineInstr &MI);
158 bool lowerSetRounding(MachineInstr &MI);
159 bool lowerGetRounding(MachineInstr &MI);
160 bool applyRegisterBanksVgprWithSgprRsrc(MachineInstr &MI, unsigned RsrcIdx);
161};
162
163} // end namespace AMDGPU
164} // end namespace llvm
165
166#endif
167