1//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI DAG Lowering interface definition
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
15#define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
16
17#include "AMDGPUISelLowering.h"
18#include "AMDGPUArgumentUsageInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20
21namespace llvm {
22
23class GCNSubtarget;
24class SIMachineFunctionInfo;
25class SIRegisterInfo;
26
27namespace AMDGPU {
28struct ImageDimIntrinsicInfo;
29}
30
31class SITargetLowering final : public AMDGPUTargetLowering {
32private:
33 const GCNSubtarget *Subtarget;
34
35public:
36 MVT getRegisterTypeForCallingConv(LLVMContext &Context,
37 CallingConv::ID CC,
38 EVT VT) const override;
39 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
40 CallingConv::ID CC,
41 EVT VT) const override;
42
43 unsigned getVectorTypeBreakdownForCallingConv(
44 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
45 unsigned &NumIntermediates, MVT &RegisterVT) const override;
46
47private:
48 SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
49 SDValue Chain, uint64_t Offset) const;
50 SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
51 SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const;
52 SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
53 const SDLoc &SL, SDValue Chain,
54 uint64_t Offset, Align Alignment,
55 bool Signed,
56 const ISD::InputArg *Arg = nullptr) const;
57 SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL,
58 Align Alignment,
59 ImplicitParameter Param) const;
60
61 SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
62 const SDLoc &SL, SDValue Chain,
63 const ISD::InputArg &Arg) const;
64 SDValue getPreloadedValue(SelectionDAG &DAG,
65 const SIMachineFunctionInfo &MFI,
66 EVT VT,
67 AMDGPUFunctionArgInfo::PreloadedValue) const;
68
69 SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
70 SelectionDAG &DAG) const override;
71 SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
72 MVT VT, unsigned Offset) const;
73 SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
74 SelectionDAG &DAG, bool WithChain) const;
75 SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
76 SDValue CachePolicy, SelectionDAG &DAG) const;
77
78 SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
79 unsigned NewOpcode) const;
80 SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
81 unsigned NewOpcode) const;
82
83 SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
84 SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
85 const ArgDescriptor &ArgDesc) const;
86
87 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
88 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
89 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
90
91 // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
92 // (the offset that is included in bounds checking and swizzling, to be split
93 // between the instruction's voffset and immoffset fields) and soffset (the
94 // offset that is excluded from bounds checking and swizzling, to go in the
95 // instruction's soffset field). This function takes the first kind of
96 // offset and figures out how to split it between voffset and immoffset.
97 std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
98 SelectionDAG &DAG) const;
99
100 SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const;
101 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
102 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
103 SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
104 SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const;
105 SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
106 SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
107 SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
108 SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
109 SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
110 SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const;
111 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
112 SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
113 SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const;
114 SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const;
115 SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const;
116 SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
117 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
118 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
119 SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
120 SelectionDAG &DAG, ArrayRef<SDValue> Ops,
121 bool IsIntrinsic = false) const;
122
123 SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG,
124 ArrayRef<SDValue> Ops) const;
125
126 // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
127 // dwordx4 if on SI.
128 SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
129 ArrayRef<SDValue> Ops, EVT MemVT,
130 MachineMemOperand *MMO, SelectionDAG &DAG) const;
131
132 SDValue handleD16VData(SDValue VData, SelectionDAG &DAG,
133 bool ImageStore = false) const;
134
135 /// Converts \p Op, which must be of floating point type, to the
136 /// floating point type \p VT, by either extending or truncating it.
137 SDValue getFPExtOrFPRound(SelectionDAG &DAG,
138 SDValue Op,
139 const SDLoc &DL,
140 EVT VT) const;
141
142 SDValue convertArgType(
143 SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
144 bool Signed, const ISD::InputArg *Arg = nullptr) const;
145
146 /// Custom lowering for ISD::FP_ROUND for MVT::f16.
147 SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
148 SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
149 SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
150 SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
151 SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
152 SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
153
154 SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
155 SelectionDAG &DAG) const;
156
157 SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
158 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
159 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
160 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
161 SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
162 SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
163 SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
164
165 SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
166 SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const;
167 SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
168 SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
169 SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
170
171 SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
172
173 SDValue performUCharToFloatCombine(SDNode *N,
174 DAGCombinerInfo &DCI) const;
175 SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const;
176
177 SDValue performSHLPtrCombine(SDNode *N,
178 unsigned AS,
179 EVT MemVT,
180 DAGCombinerInfo &DCI) const;
181
182 SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const;
183
184 SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL,
185 unsigned Opc, SDValue LHS,
186 const ConstantSDNode *CRHS) const;
187
188 SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
189 SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
190 SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
191 SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
192 SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
193 SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
194 SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
195 const APFloat &C) const;
196 SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
197
198 SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
199 SDValue Op0, SDValue Op1) const;
200 SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
201 SDValue Src, SDValue MinVal, SDValue MaxVal,
202 bool Signed) const;
203 SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
204 SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
205 SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
206 SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
207 SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
208 SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const;
209
210 SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
211 unsigned getFusedOpcode(const SelectionDAG &DAG,
212 const SDNode *N0, const SDNode *N1) const;
213 SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
214 SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
215 SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
216 SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
217 SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
218 SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
219 SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220 SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
221 SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
222 SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223 SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
224 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
225
226 bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
227
228 unsigned isCFIntrinsic(const SDNode *Intr) const;
229
230public:
231 /// \returns True if fixup needs to be emitted for given global value \p GV,
232 /// false otherwise.
233 bool shouldEmitFixup(const GlobalValue *GV) const;
234
235 /// \returns True if GOT relocation needs to be emitted for given global value
236 /// \p GV, false otherwise.
237 bool shouldEmitGOTReloc(const GlobalValue *GV) const;
238
239 /// \returns True if PC-relative relocation needs to be emitted for given
240 /// global value \p GV, false otherwise.
241 bool shouldEmitPCReloc(const GlobalValue *GV) const;
242
243 /// \returns true if this should use a literal constant for an LDS address,
244 /// and not emit a relocation for an LDS global.
245 bool shouldUseLDSConstAddress(const GlobalValue *GV) const;
246
247 /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
248 /// expanded into a set of cmp/select instructions.
249 static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem,
250 bool IsDivergentIdx,
251 const GCNSubtarget *Subtarget);
252
253 bool shouldExpandVectorDynExt(SDNode *N) const;
254
255private:
256 // Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
257 // the three offsets (voffset, soffset and instoffset) into the SDValue[3]
258 // array pointed to by Offsets.
259 void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
260 SDValue *Offsets, Align Alignment = Align(4)) const;
261
262 // Convert the i128 that an addrspace(8) pointer is natively represented as
263 // into the v4i32 that all the buffer intrinsics expect to receive. We can't
264 // add register classes for i128 on pain of the promotion logic going haywire,
265 // so this slightly ugly hack is what we've got. If passed a non-pointer
266 // argument (as would be seen in older buffer intrinsics), does nothing.
267 SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const;
268
269 // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code
270 // represents ptr addrspace(8)) using the flags specified in the intrinsic.
271 SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const;
272
273 // Handle 8 bit and 16 bit buffer loads
274 SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
275 ArrayRef<SDValue> Ops,
276 MachineMemOperand *MMO,
277 bool IsTFE = false) const;
278
279 // Handle 8 bit and 16 bit buffer stores
280 SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,
281 SDLoc DL, SDValue Ops[],
282 MemSDNode *M) const;
283
284public:
285 SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
286
287 const GCNSubtarget *getSubtarget() const;
288
289 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
290
291 bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
292 EVT SrcVT) const override;
293
294 bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy,
295 LLT SrcTy) const override;
296
297 bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
298
299 // While address space 7 should never make it to codegen, it still needs to
300 // have a MVT to prevent some analyses that query this function from breaking,
301 // so, to work around the lack of i160, map it to v5i32.
302 MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
303 MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
304
305 bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
306 MachineFunction &MF,
307 unsigned IntrinsicID) const override;
308
309 void CollectTargetIntrinsicOperands(const CallInst &I,
310 SmallVectorImpl<SDValue> &Ops,
311 SelectionDAG &DAG) const override;
312
313 bool getAddrModeArguments(IntrinsicInst * /*I*/,
314 SmallVectorImpl<Value*> &/*Ops*/,
315 Type *&/*AccessTy*/) const override;
316
317 bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
318 bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
319 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
320 unsigned AS,
321 Instruction *I = nullptr) const override;
322
323 bool canMergeStoresTo(unsigned AS, EVT MemVT,
324 const MachineFunction &MF) const override;
325
326 bool allowsMisalignedMemoryAccessesImpl(
327 unsigned Size, unsigned AddrSpace, Align Alignment,
328 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
329 unsigned *IsFast = nullptr) const;
330
331 bool allowsMisalignedMemoryAccesses(
332 LLT Ty, unsigned AddrSpace, Align Alignment,
333 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
334 unsigned *IsFast = nullptr) const override {
335 if (IsFast)
336 *IsFast = 0;
337 return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace,
338 Alignment, Flags, IsFast);
339 }
340
341 bool allowsMisalignedMemoryAccesses(
342 EVT VT, unsigned AS, Align Alignment,
343 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
344 unsigned *IsFast = nullptr) const override;
345
346 EVT getOptimalMemOpType(const MemOp &Op,
347 const AttributeList &FuncAttributes) const override;
348
349 bool isMemOpUniform(const SDNode *N) const;
350 bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
351
352 static bool isNonGlobalAddrSpace(unsigned AS);
353
354 bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
355
356 TargetLoweringBase::LegalizeTypeAction
357 getPreferredVectorAction(MVT VT) const override;
358
359 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
360 Type *Ty) const override;
361
362 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
363 unsigned Index) const override;
364
365 bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
366
367 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
368
369 unsigned combineRepeatedFPDivisors() const override {
370 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
371 // reciprocal.
372 return 2;
373 }
374
375 bool supportSplitCSR(MachineFunction *MF) const override;
376 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
377 void insertCopiesSplitCSR(
378 MachineBasicBlock *Entry,
379 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
380
381 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
382 bool isVarArg,
383 const SmallVectorImpl<ISD::InputArg> &Ins,
384 const SDLoc &DL, SelectionDAG &DAG,
385 SmallVectorImpl<SDValue> &InVals) const override;
386
387 bool CanLowerReturn(CallingConv::ID CallConv,
388 MachineFunction &MF, bool isVarArg,
389 const SmallVectorImpl<ISD::OutputArg> &Outs,
390 LLVMContext &Context) const override;
391
392 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
393 const SmallVectorImpl<ISD::OutputArg> &Outs,
394 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
395 SelectionDAG &DAG) const override;
396
397 void passSpecialInputs(
398 CallLoweringInfo &CLI,
399 CCState &CCInfo,
400 const SIMachineFunctionInfo &Info,
401 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
402 SmallVectorImpl<SDValue> &MemOpChains,
403 SDValue Chain) const;
404
405 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
406 CallingConv::ID CallConv, bool isVarArg,
407 const SmallVectorImpl<ISD::InputArg> &Ins,
408 const SDLoc &DL, SelectionDAG &DAG,
409 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
410 SDValue ThisVal) const;
411
412 bool mayBeEmittedAsTailCall(const CallInst *) const override;
413
414 bool isEligibleForTailCallOptimization(
415 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
416 const SmallVectorImpl<ISD::OutputArg> &Outs,
417 const SmallVectorImpl<SDValue> &OutVals,
418 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
419
420 SDValue LowerCall(CallLoweringInfo &CLI,
421 SmallVectorImpl<SDValue> &InVals) const override;
422
423 SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const;
424 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
425 SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
426 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
427 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
428
429 SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
430 SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
431 SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
432 SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const;
433
434 Register getRegisterByName(const char* RegName, LLT VT,
435 const MachineFunction &MF) const override;
436
437 MachineBasicBlock *splitKillBlock(MachineInstr &MI,
438 MachineBasicBlock *BB) const;
439
440 void bundleInstWithWaitcnt(MachineInstr &MI) const;
441 MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
442 MachineBasicBlock *BB) const;
443
444 MachineBasicBlock *
445 EmitInstrWithCustomInserter(MachineInstr &MI,
446 MachineBasicBlock *BB) const override;
447
448 bool enableAggressiveFMAFusion(EVT VT) const override;
449 bool enableAggressiveFMAFusion(LLT Ty) const override;
450 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
451 EVT VT) const override;
452 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
453 LLT getPreferredShiftAmountTy(LLT Ty) const override;
454
455 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
456 EVT VT) const override;
457 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
458 const LLT Ty) const override;
459 bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
460 bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
461
462 SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
463 SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
464 SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
465 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
466
467 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
468 SelectionDAG &DAG) const override;
469
470 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
471 SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
472 void AddMemOpInit(MachineInstr &MI) const;
473 void AdjustInstrPostInstrSelection(MachineInstr &MI,
474 SDNode *Node) const override;
475
476 SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
477
478 MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL,
479 SDValue Ptr) const;
480 MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr,
481 uint32_t RsrcDword1, uint64_t RsrcDword2And3) const;
482 std::pair<unsigned, const TargetRegisterClass *>
483 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
484 StringRef Constraint, MVT VT) const override;
485 ConstraintType getConstraintType(StringRef Constraint) const override;
486 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
487 std::vector<SDValue> &Ops,
488 SelectionDAG &DAG) const override;
489 bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const;
490 bool checkAsmConstraintVal(SDValue Op, StringRef Constraint,
491 uint64_t Val) const;
492 bool checkAsmConstraintValA(SDValue Op,
493 uint64_t Val,
494 unsigned MaxSize = 64) const;
495 SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
496 SDValue V) const;
497
498 void finalizeLowering(MachineFunction &MF) const override;
499
500 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
501 const APInt &DemandedElts,
502 const SelectionDAG &DAG,
503 unsigned Depth = 0) const override;
504 void computeKnownBitsForFrameIndex(int FrameIdx,
505 KnownBits &Known,
506 const MachineFunction &MF) const override;
507 void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R,
508 KnownBits &Known,
509 const APInt &DemandedElts,
510 const MachineRegisterInfo &MRI,
511 unsigned Depth = 0) const override;
512
513 Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R,
514 const MachineRegisterInfo &MRI,
515 unsigned Depth = 0) const override;
516 bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI,
517 UniformityInfo *UA) const override;
518
519 bool hasMemSDNodeUser(SDNode *N) const;
520
521 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
522 SDValue N1) const override;
523
524 bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
525 Register N1) const override;
526
527 bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
528 unsigned MaxDepth = 5) const;
529 bool isCanonicalized(Register Reg, const MachineFunction &MF,
530 unsigned MaxDepth = 5) const;
531 bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
532 bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
533
534 bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
535 const TargetRegisterInfo *TRI,
536 const TargetInstrInfo *TII, unsigned &PhysReg,
537 int &Cost) const override;
538
539 bool isKnownNeverNaNForTargetNode(SDValue Op,
540 const SelectionDAG &DAG,
541 bool SNaN = false,
542 unsigned Depth = 0) const override;
543 AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
544 AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
545 AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
546 AtomicExpansionKind
547 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
548 void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
549
550 LoadInst *
551 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
552
553 const TargetRegisterClass *getRegClassFor(MVT VT,
554 bool isDivergent) const override;
555 bool requiresUniformRegister(MachineFunction &MF,
556 const Value *V) const override;
557 Align getPrefLoopAlignment(MachineLoop *ML) const override;
558
559 void allocateHSAUserSGPRs(CCState &CCInfo,
560 MachineFunction &MF,
561 const SIRegisterInfo &TRI,
562 SIMachineFunctionInfo &Info) const;
563
564 void allocatePreloadKernArgSGPRs(CCState &CCInfo,
565 SmallVectorImpl<CCValAssign> &ArgLocs,
566 const SmallVectorImpl<ISD::InputArg> &Ins,
567 MachineFunction &MF,
568 const SIRegisterInfo &TRI,
569 SIMachineFunctionInfo &Info) const;
570
571 void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF,
572 const SIRegisterInfo &TRI,
573 SIMachineFunctionInfo &Info) const;
574
575 void allocateSystemSGPRs(CCState &CCInfo,
576 MachineFunction &MF,
577 SIMachineFunctionInfo &Info,
578 CallingConv::ID CallConv,
579 bool IsShader) const;
580
581 void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
582 MachineFunction &MF,
583 const SIRegisterInfo &TRI,
584 SIMachineFunctionInfo &Info) const;
585 void allocateSpecialInputSGPRs(
586 CCState &CCInfo,
587 MachineFunction &MF,
588 const SIRegisterInfo &TRI,
589 SIMachineFunctionInfo &Info) const;
590
591 void allocateSpecialInputVGPRs(CCState &CCInfo,
592 MachineFunction &MF,
593 const SIRegisterInfo &TRI,
594 SIMachineFunctionInfo &Info) const;
595 void allocateSpecialInputVGPRsFixed(CCState &CCInfo,
596 MachineFunction &MF,
597 const SIRegisterInfo &TRI,
598 SIMachineFunctionInfo &Info) const;
599
600 MachineMemOperand::Flags
601 getTargetMMOFlags(const Instruction &I) const override;
602};
603
604// Returns true if argument is a boolean value which is not serialized into
605// memory or argument and does not require v_cndmask_b32 to be deserialized.
606bool isBoolSGPR(SDValue V);
607
608} // End namespace llvm
609
610#endif
611