1//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//==-----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines an instruction selector for the AMDGPU target.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
15#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
16
17#include "GCNSubtarget.h"
18#include "SIMachineFunctionInfo.h"
19#include "SIModeRegisterDefaults.h"
20#include "llvm/Analysis/ValueTracking.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetMachine.h"
23
24namespace llvm {
25
26static inline bool getConstantValue(SDValue N, uint32_t &Out) {
27 // This is only used for packed vectors, where using 0 for undef should
28 // always be good.
29 if (N.isUndef()) {
30 Out = 0;
31 return true;
32 }
33
34 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: N)) {
35 Out = C->getAPIntValue().getSExtValue();
36 return true;
37 }
38
39 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val&: N)) {
40 Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
41 return true;
42 }
43
44 return false;
45}
46
47// TODO: Handle undef as zero
48static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
49 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
50 uint32_t LHSVal, RHSVal;
51 if (getConstantValue(N: N->getOperand(Num: 0), Out&: LHSVal) &&
52 getConstantValue(N: N->getOperand(Num: 1), Out&: RHSVal)) {
53 SDLoc SL(N);
54 uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
55 return DAG.getMachineNode(Opcode: AMDGPU::S_MOV_B32, dl: SL, VT: N->getValueType(ResNo: 0),
56 Op1: DAG.getTargetConstant(Val: K, DL: SL, VT: MVT::i32));
57 }
58
59 return nullptr;
60}
61
62/// AMDGPU specific code to select AMDGPU machine instructions for
63/// SelectionDAG operations.
64class AMDGPUDAGToDAGISel : public SelectionDAGISel {
65 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
66 // make the right decision when generating code for different targets.
67 const GCNSubtarget *Subtarget;
68
69 // Default FP mode for the current function.
70 SIModeRegisterDefaults Mode;
71
72 // Instructions that will be lowered with a final instruction that zeros the
73 // high result bits.
74 bool fp16SrcZerosHighBits(unsigned Opc) const;
75
76public:
77 AMDGPUDAGToDAGISel() = delete;
78
79 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel);
80
81 bool runOnMachineFunction(MachineFunction &MF) override;
82 bool matchLoadD16FromBuildVector(SDNode *N) const;
83 void PreprocessISelDAG() override;
84 void Select(SDNode *N) override;
85 void PostprocessISelDAG() override;
86
87protected:
88 void SelectBuildVector(SDNode *N, unsigned RegClassID);
89 void SelectVectorShuffle(SDNode *N);
90
91private:
92 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
93
94 bool isInlineImmediate(const SDNode *N) const;
95
96 bool isInlineImmediate(const APInt &Imm) const {
97 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
98 }
99
100 bool isInlineImmediate(const APFloat &Imm) const {
101 return Subtarget->getInstrInfo()->isInlineConstant(Imm);
102 }
103
104 bool isVGPRImm(const SDNode *N) const;
105 bool isUniformLoad(const SDNode *N) const;
106 bool isUniformBr(const SDNode *N) const;
107
108 // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
109 // `ShAmtBits` bits is unneeded.
110 bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
111
112 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
113 SDValue &RHS) const;
114
115 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
116
117 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
118 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
119 SDNode *glueCopyToM0LDSInit(SDNode *N) const;
120
121 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
122 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
123 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
124 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
125 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
126 unsigned Size) const;
127
128 bool isFlatScratchBaseLegal(SDValue Addr) const;
129 bool isFlatScratchBaseLegalSV(SDValue Addr) const;
130 bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;
131 bool isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only,
132 bool IsBuffer, int64_t ImmOffset = 0) const;
133
134 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
135 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
136 SDValue &Offset1) const;
137 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
138 SDValue &Offset1) const;
139 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
140 SDValue &Offset1, unsigned Size) const;
141 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
142 SDValue &SOffset, SDValue &Offset, SDValue &Offen,
143 SDValue &Idxen, SDValue &Addr64) const;
144 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
145 SDValue &SOffset, SDValue &Offset) const;
146 bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
147 SDValue &VAddr, SDValue &SOffset,
148 SDValue &ImmOffset) const;
149 bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
150 SDValue &Soffset, SDValue &Offset) const;
151
152 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
153 SDValue &Offset) const;
154 bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const;
155
156 bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
157 SDValue &Offset, uint64_t FlatVariant) const;
158 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
159 SDValue &Offset) const;
160 bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
161 SDValue &Offset) const;
162 bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
163 SDValue &Offset) const;
164 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
165 SDValue &VOffset, SDValue &Offset) const;
166 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
167 SDValue &Offset) const;
168 bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr,
169 uint64_t ImmOffset) const;
170 bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr,
171 SDValue &SAddr, SDValue &Offset) const;
172
173 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset,
174 SDValue *Offset, bool Imm32Only = false,
175 bool IsBuffer = false, bool HasSOffset = false,
176 int64_t ImmOffset = 0) const;
177 SDValue Expand32BitAddress(SDValue Addr) const;
178 bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset,
179 SDValue *Offset, bool Imm32Only = false,
180 bool IsBuffer = false, bool HasSOffset = false,
181 int64_t ImmOffset = 0) const;
182 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset,
183 SDValue *Offset, bool Imm32Only = false) const;
184 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
185 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
186 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const;
187 bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset,
188 SDValue &Offset) const;
189 bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const;
190 bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const;
191 bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
192 SDValue &Offset) const;
193 bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase,
194 SDValue &Offset) const;
195 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
196
197 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
198 bool IsCanonicalizing = true,
199 bool AllowAbs = true) const;
200 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
201 bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
202 SDValue &SrcMods) const;
203 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
204 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
205 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
206 SDValue &Clamp, SDValue &Omod) const;
207 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
208 SDValue &Clamp, SDValue &Omod) const;
209 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
210 SDValue &Clamp, SDValue &Omod) const;
211
212 bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods,
213 bool OpSel) const;
214 bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
215 bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const;
216
217 bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
218 SDValue &Omod) const;
219
220 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods,
221 bool IsDOT = false) const;
222 bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
223
224 bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
225 bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
226
227 bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
228 SDValue &SrcMods) const;
229 bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const;
230 bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
231 SDValue &SrcMods) const;
232 bool SelectWMMAVISrc(SDValue In, SDValue &Src) const;
233
234 bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const;
235 bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const;
236
237 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
238
239 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
240 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
241 unsigned &Mods) const;
242 bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
243 SDValue &SrcMods) const;
244 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
245
246 bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2,
247 SDValue &Tbl) const;
248
249 SDValue getHi16Elt(SDValue In) const;
250
251 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
252
253 void SelectADD_SUB_I64(SDNode *N);
254 void SelectAddcSubb(SDNode *N);
255 void SelectUADDO_USUBO(SDNode *N);
256 void SelectDIV_SCALE(SDNode *N);
257 void SelectMAD_64_32(SDNode *N);
258 void SelectMUL_LOHI(SDNode *N);
259 void SelectFMA_W_CHAIN(SDNode *N);
260 void SelectFMUL_W_CHAIN(SDNode *N);
261 SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
262 uint32_t Width);
263 void SelectS_BFEFromShifts(SDNode *N);
264 void SelectS_BFE(SDNode *N);
265 bool isCBranchSCC(const SDNode *N) const;
266 void SelectBRCOND(SDNode *N);
267 void SelectFMAD_FMA(SDNode *N);
268 void SelectFP_EXTEND(SDNode *N);
269 void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
270 void SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID);
271 void SelectDS_GWS(SDNode *N, unsigned IntrID);
272 void SelectInterpP1F16(SDNode *N);
273 void SelectINTRINSIC_W_CHAIN(SDNode *N);
274 void SelectINTRINSIC_WO_CHAIN(SDNode *N);
275 void SelectINTRINSIC_VOID(SDNode *N);
276 void SelectWAVE_ADDRESS(SDNode *N);
277 void SelectSTACKRESTORE(SDNode *N);
278
279protected:
280 // Include the pieces autogenerated from the target description.
281#include "AMDGPUGenDAGISel.inc"
282};
283
284class AMDGPUISelDAGToDAGPass : public SelectionDAGISelPass {
285public:
286 AMDGPUISelDAGToDAGPass(TargetMachine &TM);
287
288 PreservedAnalyses run(MachineFunction &MF,
289 MachineFunctionAnalysisManager &MFAM);
290};
291
292class AMDGPUDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
293public:
294 static char ID;
295
296 AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel);
297
298 bool runOnMachineFunction(MachineFunction &MF) override;
299 void getAnalysisUsage(AnalysisUsage &AU) const override;
300 StringRef getPassName() const override;
301};
302
303} // namespace llvm
304
305#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
306