1//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI DAG Lowering interface definition
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
15#define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
16
17#include "AMDGPUArgumentUsageInfo.h"
18#include "AMDGPUISelLowering.h"
19#include "SIDefines.h"
20#include "llvm/CodeGen/MachineFunction.h"
21
22namespace llvm {
23
24class GCNSubtarget;
25class SIMachineFunctionInfo;
26class SIRegisterInfo;
27
28namespace AMDGPU {
29struct ImageDimIntrinsicInfo;
30}
31
32class SITargetLowering final : public AMDGPUTargetLowering {
33private:
34 const GCNSubtarget *Subtarget;
35
36public:
37 MVT getRegisterTypeForCallingConv(LLVMContext &Context,
38 CallingConv::ID CC,
39 EVT VT) const override;
40 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
41 CallingConv::ID CC,
42 EVT VT) const override;
43
44 unsigned getVectorTypeBreakdownForCallingConv(
45 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
46 unsigned &NumIntermediates, MVT &RegisterVT) const override;
47
48 MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
49
50private:
51 SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
52 SDValue Chain, uint64_t Offset) const;
53 SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
54 SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const;
55 SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
56 const SDLoc &SL, SDValue Chain,
57 uint64_t Offset, Align Alignment,
58 bool Signed,
59 const ISD::InputArg *Arg = nullptr) const;
60 SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL,
61 Align Alignment,
62 ImplicitParameter Param) const;
63
64 SDValue convertABITypeToValueType(SelectionDAG &DAG, SDValue Val,
65 CCValAssign &VA, const SDLoc &SL) const;
66
67 SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
68 const SDLoc &SL, SDValue Chain,
69 const ISD::InputArg &Arg) const;
70 SDValue lowerWorkGroupId(
71 SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
72 AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
73 AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
74 AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
75 SDValue getPreloadedValue(SelectionDAG &DAG,
76 const SIMachineFunctionInfo &MFI,
77 EVT VT,
78 AMDGPUFunctionArgInfo::PreloadedValue) const;
79
80 SDValue LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI, SDValue Op,
81 SelectionDAG &DAG) const override;
82 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
83
84 SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
85 MVT VT, unsigned Offset) const;
86 SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
87 SelectionDAG &DAG, bool WithChain) const;
88 SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
89 SDValue CachePolicy, SelectionDAG &DAG) const;
90
91 SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
92 unsigned NewOpcode) const;
93 SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
94 unsigned NewOpcode) const;
95
96 SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
97 SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
98 AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
99 unsigned Width) const;
100 SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
101 const ArgDescriptor &ArgDesc) const;
102
103 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
104 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
105 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
106
107 // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
108 // (the offset that is included in bounds checking and swizzling, to be split
109 // between the instruction's voffset and immoffset fields) and soffset (the
110 // offset that is excluded from bounds checking and swizzling, to go in the
111 // instruction's soffset field). This function takes the first kind of
112 // offset and figures out how to split it between voffset and immoffset.
113 std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
114 SelectionDAG &DAG) const;
115
116 SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const;
117 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
118 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
119 SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
120 SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const;
121 SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
122 SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
123 SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
124 SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
125 SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
126 SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const;
127 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
128 SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
129 SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const;
130 SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const;
131 SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const;
132 SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
133 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
134 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
135 SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
136 SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
137 SelectionDAG &DAG, ArrayRef<SDValue> Ops,
138 bool IsIntrinsic = false) const;
139
140 SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG,
141 ArrayRef<SDValue> Ops) const;
142
143 // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
144 // dwordx4 if on SI.
145 SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
146 ArrayRef<SDValue> Ops, EVT MemVT,
147 MachineMemOperand *MMO, SelectionDAG &DAG) const;
148
149 SDValue handleD16VData(SDValue VData, SelectionDAG &DAG,
150 bool ImageStore = false) const;
151
152 /// Converts \p Op, which must be of floating point type, to the
153 /// floating point type \p VT, by either extending or truncating it.
154 SDValue getFPExtOrFPRound(SelectionDAG &DAG,
155 SDValue Op,
156 const SDLoc &DL,
157 EVT VT) const;
158
159 SDValue convertArgType(
160 SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
161 bool Signed, const ISD::InputArg *Arg = nullptr) const;
162
163 /// Custom lowering for ISD::FP_ROUND for MVT::f16.
164 SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
165 SDValue splitFP_ROUNDVectorOp(SDValue Op, SelectionDAG &DAG) const;
166 SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
167 SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
168 SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
169 SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
170 SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
171 SDValue promoteUniformUnaryOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
172 SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
173 SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
174 SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
175 SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
176
177 SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
178 SelectionDAG &DAG) const;
179
180 SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
181 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
182 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
183 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
184 SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
185 SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
186 SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
187
188 SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
189 SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const;
190 SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
191 SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
192 SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
193 SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
194
195 SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
196
197 SDValue performUCharToFloatCombine(SDNode *N,
198 DAGCombinerInfo &DCI) const;
199 SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const;
200
201 SDValue performSHLPtrCombine(SDNode *N,
202 unsigned AS,
203 EVT MemVT,
204 DAGCombinerInfo &DCI) const;
205
206 SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const;
207
208 SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL,
209 unsigned Opc, SDValue LHS,
210 const ConstantSDNode *CRHS) const;
211
212 SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
213 SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
214 SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
215 SDValue performZeroOrAnyExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
216 SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
217 SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
218 SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
219 const APFloat &C) const;
220 SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221
222 SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
223 SDValue Op0, SDValue Op1,
224 bool IsKnownNoNaNs) const;
225 SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
226 SDValue Src, SDValue MinVal, SDValue MaxVal,
227 bool Signed) const;
228 SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
229 SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
230 SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
231 SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
232 SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
233 SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const;
234 SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
235
236 SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
237 unsigned getFusedOpcode(const SelectionDAG &DAG,
238 const SDNode *N0, const SDNode *N1) const;
239 SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
240 SDValue foldAddSub64WithZeroLowBitsTo32(SDNode *N,
241 DAGCombinerInfo &DCI) const;
242
243 SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
244 SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
245 SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
246 SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
247 SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
248 SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
249 SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
250 SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
251 SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
252 SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
253 SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
254 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
255
256 bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
257
258 unsigned isCFIntrinsic(const SDNode *Intr) const;
259
260public:
261 /// \returns True if fixup needs to be emitted for given global value \p GV,
262 /// false otherwise.
263 bool shouldEmitFixup(const GlobalValue *GV) const;
264
265 /// \returns True if GOT relocation needs to be emitted for given global value
266 /// \p GV, false otherwise.
267 bool shouldEmitGOTReloc(const GlobalValue *GV) const;
268
269 /// \returns True if PC-relative relocation needs to be emitted for given
270 /// global value \p GV, false otherwise.
271 bool shouldEmitPCReloc(const GlobalValue *GV) const;
272
273 /// \returns true if this should use a literal constant for an LDS address,
274 /// and not emit a relocation for an LDS global.
275 bool shouldUseLDSConstAddress(const GlobalValue *GV) const;
276
277 /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
278 /// expanded into a set of cmp/select instructions.
279 static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem,
280 bool IsDivergentIdx,
281 const GCNSubtarget *Subtarget);
282
283 bool shouldExpandVectorDynExt(SDNode *N) const;
284
285 bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
286
287 bool canTransformPtrArithOutOfBounds(const Function &F,
288 EVT PtrVT) const override;
289
290private:
291 /// Returns true if the first real instruction in MBB is 8 bytes and could
292 /// be split by a 32-byte fetch window boundary. Used on GFX950 to avoid
293 /// instruction fetch delays.
294 bool needsFetchWindowAlignment(const MachineBasicBlock &MBB) const;
295
296 // Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
297 // the three offsets (voffset, soffset and instoffset) into the SDValue[3]
298 // array pointed to by Offsets.
299 void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
300 SDValue *Offsets, Align Alignment = Align(4)) const;
301
302 // Convert the i128 that an addrspace(8) pointer is natively represented as
303 // into the v4i32 that all the buffer intrinsics expect to receive. We can't
304 // add register classes for i128 on pain of the promotion logic going haywire,
305 // so this slightly ugly hack is what we've got. If passed a non-pointer
306 // argument (as would be seen in older buffer intrinsics), does nothing.
307 SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const;
308
309 // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code
310 // represents ptr addrspace(8)) using the flags specified in the intrinsic.
311 SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const;
312
313 // Handle 8 bit and 16 bit buffer loads
314 SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
315 ArrayRef<SDValue> Ops,
316 MachineMemOperand *MMO,
317 bool IsTFE = false) const;
318
319 // Handle 8 bit and 16 bit buffer stores
320 SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,
321 SDLoc DL, SDValue Ops[],
322 MemSDNode *M) const;
323
324public:
325 SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
326
327 const GCNSubtarget *getSubtarget() const;
328
329 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
330
331 bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
332 EVT SrcVT) const override;
333
334 bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy,
335 LLT SrcTy) const override;
336
337 bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
338
339 // While address space 7 should never make it to codegen, it still needs to
340 // have a MVT to prevent some analyses that query this function from breaking.
341 // We use the custum MVT::amdgpuBufferFatPointer and
342 // amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
343 // memory type (which is probably unused).
344 MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
345 MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
346
347 void getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &, const CallBase &,
348 MachineFunction &MF,
349 unsigned IntrinsicID) const override;
350
351 void CollectTargetIntrinsicOperands(const CallInst &I,
352 SmallVectorImpl<SDValue> &Ops,
353 SelectionDAG &DAG) const override;
354
355 bool getAddrModeArguments(const IntrinsicInst *I,
356 SmallVectorImpl<Value *> &Ops,
357 Type *&AccessTy) const override;
358
359 bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
360 bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
361 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
362 unsigned AS,
363 Instruction *I = nullptr) const override;
364
365 bool canMergeStoresTo(unsigned AS, EVT MemVT,
366 const MachineFunction &MF) const override;
367
368 bool allowsMisalignedMemoryAccessesImpl(
369 unsigned Size, unsigned AddrSpace, Align Alignment,
370 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
371 unsigned *IsFast = nullptr) const;
372
373 bool allowsMisalignedMemoryAccesses(
374 LLT Ty, unsigned AddrSpace, Align Alignment,
375 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
376 unsigned *IsFast = nullptr) const override {
377 if (IsFast)
378 *IsFast = 0;
379 return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace,
380 Alignment, Flags, IsFast);
381 }
382
383 bool allowsMisalignedMemoryAccesses(
384 EVT VT, unsigned AS, Align Alignment,
385 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
386 unsigned *IsFast = nullptr) const override;
387
388 EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
389 const AttributeList &FuncAttributes) const override;
390
391 bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
392
393 static bool isNonGlobalAddrSpace(unsigned AS);
394
395 bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
396
397 TargetLoweringBase::LegalizeTypeAction
398 getPreferredVectorAction(MVT VT) const override;
399
400 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
401 Type *Ty) const override;
402
403 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
404 unsigned Index) const override;
405 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
406
407 bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
408
409 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
410
411 unsigned combineRepeatedFPDivisors() const override {
412 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
413 // reciprocal.
414 return 2;
415 }
416
417 bool supportSplitCSR(MachineFunction *MF) const override;
418 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
419 void insertCopiesSplitCSR(
420 MachineBasicBlock *Entry,
421 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
422
423 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
424 bool isVarArg,
425 const SmallVectorImpl<ISD::InputArg> &Ins,
426 const SDLoc &DL, SelectionDAG &DAG,
427 SmallVectorImpl<SDValue> &InVals) const override;
428
429 bool CanLowerReturn(CallingConv::ID CallConv,
430 MachineFunction &MF, bool isVarArg,
431 const SmallVectorImpl<ISD::OutputArg> &Outs,
432 LLVMContext &Context, const Type *RetTy) const override;
433
434 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
435 const SmallVectorImpl<ISD::OutputArg> &Outs,
436 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
437 SelectionDAG &DAG) const override;
438
439 void passSpecialInputs(
440 CallLoweringInfo &CLI,
441 CCState &CCInfo,
442 const SIMachineFunctionInfo &Info,
443 SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
444 SmallVectorImpl<SDValue> &MemOpChains,
445 SDValue Chain) const;
446
447 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
448 CallingConv::ID CallConv, bool isVarArg,
449 const SmallVectorImpl<ISD::InputArg> &Ins,
450 const SDLoc &DL, SelectionDAG &DAG,
451 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
452 SDValue ThisVal) const;
453
454 bool mayBeEmittedAsTailCall(const CallInst *) const override;
455
456 bool isEligibleForTailCallOptimization(
457 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
458 const SmallVectorImpl<ISD::OutputArg> &Outs,
459 const SmallVectorImpl<SDValue> &OutVals,
460 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
461
462 SDValue LowerCall(CallLoweringInfo &CLI,
463 SmallVectorImpl<SDValue> &InVals) const override;
464
465 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
466 SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
467 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
468 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
469
470 SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
471 SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
472 SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
473 SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const;
474 SDValue lowerROTR(SDValue Op, SelectionDAG &DAG) const;
475
476 Register getRegisterByName(const char* RegName, LLT VT,
477 const MachineFunction &MF) const override;
478
479 MachineBasicBlock *splitKillBlock(MachineInstr &MI,
480 MachineBasicBlock *BB) const;
481
482 void bundleInstWithWaitcnt(MachineInstr &MI) const;
483 MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
484 MachineBasicBlock *BB) const;
485
486 MachineBasicBlock *
487 EmitInstrWithCustomInserter(MachineInstr &MI,
488 MachineBasicBlock *BB) const override;
489
490 bool enableAggressiveFMAFusion(EVT VT) const override;
491 bool enableAggressiveFMAFusion(LLT Ty) const override;
492 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
493 EVT VT) const override;
494 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
495 LLT getPreferredShiftAmountTy(LLT Ty) const override;
496
497 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
498 EVT VT) const override;
499 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
500 const LLT Ty) const override;
501 bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
502 bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
503
504 SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
505 SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
506 SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
507 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
508 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
509 SelectionDAG &DAG) const override;
510
511 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
512 SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
513 void AddMemOpInit(MachineInstr &MI) const;
514 void AdjustInstrPostInstrSelection(MachineInstr &MI,
515 SDNode *Node) const override;
516
517 SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
518
519 MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL,
520 SDValue Ptr) const;
521 MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr,
522 uint32_t RsrcDword1, uint64_t RsrcDword2And3) const;
523 std::pair<unsigned, const TargetRegisterClass *>
524 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
525 StringRef Constraint, MVT VT) const override;
526 ConstraintType getConstraintType(StringRef Constraint) const override;
527 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
528 std::vector<SDValue> &Ops,
529 SelectionDAG &DAG) const override;
530 bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const;
531 bool checkAsmConstraintVal(SDValue Op, StringRef Constraint,
532 uint64_t Val) const;
533 bool checkAsmConstraintValA(SDValue Op,
534 uint64_t Val,
535 unsigned MaxSize = 64) const;
536 SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
537 SDValue V) const;
538
539 void finalizeLowering(MachineFunction &MF) const override;
540
541 void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
542 const APInt &DemandedElts,
543 const SelectionDAG &DAG,
544 unsigned Depth = 0) const override;
545 void computeKnownBitsForFrameIndex(int FrameIdx,
546 KnownBits &Known,
547 const MachineFunction &MF) const override;
548 void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R,
549 KnownBits &Known,
550 const APInt &DemandedElts,
551 const MachineRegisterInfo &MRI,
552 unsigned Depth = 0) const override;
553
554 Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis,
555 Register R,
556 const MachineRegisterInfo &MRI,
557 unsigned Depth = 0) const override;
558 bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI,
559 UniformityInfo *UA) const override;
560
561 bool hasMemSDNodeUser(SDNode *N) const;
562
563 bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
564 SDValue N1) const override;
565
566 bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
567 Register N1) const override;
568
569 bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
570 SDNodeFlags UserFlags = {}, unsigned MaxDepth = 5) const;
571 bool isCanonicalized(Register Reg, const MachineFunction &MF,
572 unsigned MaxDepth = 5) const;
573 bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
574 bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
575
576 bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
577 const SelectionDAG &DAG, bool SNaN = false,
578 unsigned Depth = 0) const override;
579 AtomicExpansionKind
580 shouldExpandAtomicRMWInIR(const AtomicRMWInst *) const override;
581 AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
582 AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
583 AtomicExpansionKind
584 shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const override;
585
586 void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const;
587 void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
588 void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override;
589 void emitExpandAtomicLoad(LoadInst *LI) const override;
590 void emitExpandAtomicStore(StoreInst *SI) const override;
591
592 LoadInst *
593 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
594
595 const TargetRegisterClass *getRegClassFor(MVT VT,
596 bool isDivergent) const override;
597 bool requiresUniformRegister(MachineFunction &MF,
598 const Value *V) const override;
599 Align getPrefLoopAlignment(MachineLoop *ML) const override;
600 unsigned
601 getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const override;
602
603 void allocateHSAUserSGPRs(CCState &CCInfo,
604 MachineFunction &MF,
605 const SIRegisterInfo &TRI,
606 SIMachineFunctionInfo &Info) const;
607
608 void allocatePreloadKernArgSGPRs(CCState &CCInfo,
609 SmallVectorImpl<CCValAssign> &ArgLocs,
610 const SmallVectorImpl<ISD::InputArg> &Ins,
611 MachineFunction &MF,
612 const SIRegisterInfo &TRI,
613 SIMachineFunctionInfo &Info) const;
614
615 void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF,
616 const SIRegisterInfo &TRI,
617 SIMachineFunctionInfo &Info) const;
618
619 void allocateSystemSGPRs(CCState &CCInfo,
620 MachineFunction &MF,
621 SIMachineFunctionInfo &Info,
622 CallingConv::ID CallConv,
623 bool IsShader) const;
624
625 void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
626 MachineFunction &MF,
627 const SIRegisterInfo &TRI,
628 SIMachineFunctionInfo &Info) const;
629 void allocateSpecialInputSGPRs(
630 CCState &CCInfo,
631 MachineFunction &MF,
632 const SIRegisterInfo &TRI,
633 SIMachineFunctionInfo &Info) const;
634
635 void allocateSpecialInputVGPRs(CCState &CCInfo,
636 MachineFunction &MF,
637 const SIRegisterInfo &TRI,
638 SIMachineFunctionInfo &Info) const;
639 void allocateSpecialInputVGPRsFixed(CCState &CCInfo,
640 MachineFunction &MF,
641 const SIRegisterInfo &TRI,
642 SIMachineFunctionInfo &Info) const;
643
644 MachineMemOperand::Flags
645 getTargetMMOFlags(const Instruction &I) const override;
646};
647
648// Returns true if argument is a boolean value which is not serialized into
649// memory or argument and does not require v_cndmask_b32 to be deserialized.
650bool isBoolSGPR(SDValue V);
651
652} // End namespace llvm
653
654#endif
655