1 | //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// SI DAG Lowering interface definition |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
16 | |
17 | #include "AMDGPUISelLowering.h" |
18 | #include "AMDGPUArgumentUsageInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | |
21 | namespace llvm { |
22 | |
23 | class GCNSubtarget; |
24 | class SIMachineFunctionInfo; |
25 | class SIRegisterInfo; |
26 | |
27 | namespace AMDGPU { |
28 | struct ImageDimIntrinsicInfo; |
29 | } |
30 | |
31 | class SITargetLowering final : public AMDGPUTargetLowering { |
32 | private: |
33 | const GCNSubtarget *Subtarget; |
34 | |
35 | public: |
36 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
37 | CallingConv::ID CC, |
38 | EVT VT) const override; |
39 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
40 | CallingConv::ID CC, |
41 | EVT VT) const override; |
42 | |
43 | unsigned getVectorTypeBreakdownForCallingConv( |
44 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
45 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
46 | |
47 | private: |
48 | SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, |
49 | SDValue Chain, uint64_t Offset) const; |
50 | SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; |
51 | SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; |
52 | SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, |
53 | const SDLoc &SL, SDValue Chain, |
54 | uint64_t Offset, Align Alignment, |
55 | bool Signed, |
56 | const ISD::InputArg *Arg = nullptr) const; |
57 | SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL, |
58 | Align Alignment, |
59 | ImplicitParameter Param) const; |
60 | |
61 | SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, |
62 | const SDLoc &SL, SDValue Chain, |
63 | const ISD::InputArg &Arg) const; |
64 | SDValue getPreloadedValue(SelectionDAG &DAG, |
65 | const SIMachineFunctionInfo &MFI, |
66 | EVT VT, |
67 | AMDGPUFunctionArgInfo::PreloadedValue) const; |
68 | |
69 | SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, |
70 | SelectionDAG &DAG) const override; |
71 | SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, |
72 | MVT VT, unsigned Offset) const; |
73 | SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, |
74 | SelectionDAG &DAG, bool WithChain) const; |
75 | SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, |
76 | SDValue CachePolicy, SelectionDAG &DAG) const; |
77 | |
78 | SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
79 | unsigned NewOpcode) const; |
80 | SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
81 | unsigned NewOpcode) const; |
82 | |
83 | SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; |
84 | SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, |
85 | const ArgDescriptor &ArgDesc) const; |
86 | |
87 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
88 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
89 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
90 | |
91 | // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset |
92 | // (the offset that is included in bounds checking and swizzling, to be split |
93 | // between the instruction's voffset and immoffset fields) and soffset (the |
94 | // offset that is excluded from bounds checking and swizzling, to go in the |
95 | // instruction's soffset field). This function takes the first kind of |
96 | // offset and figures out how to split it between voffset and immoffset. |
97 | std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset, |
98 | SelectionDAG &DAG) const; |
99 | |
100 | SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const; |
101 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
102 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
103 | SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; |
104 | SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const; |
105 | SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const; |
106 | SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const; |
107 | SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; |
108 | SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; |
109 | SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; |
110 | SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const; |
111 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
112 | SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; |
113 | SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const; |
114 | SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const; |
115 | SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const; |
116 | SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |
117 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
118 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
119 | SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, |
120 | SelectionDAG &DAG, ArrayRef<SDValue> Ops, |
121 | bool IsIntrinsic = false) const; |
122 | |
123 | SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, |
124 | ArrayRef<SDValue> Ops) const; |
125 | |
126 | // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to |
127 | // dwordx4 if on SI. |
128 | SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
129 | ArrayRef<SDValue> Ops, EVT MemVT, |
130 | MachineMemOperand *MMO, SelectionDAG &DAG) const; |
131 | |
132 | SDValue handleD16VData(SDValue VData, SelectionDAG &DAG, |
133 | bool ImageStore = false) const; |
134 | |
135 | /// Converts \p Op, which must be of floating point type, to the |
136 | /// floating point type \p VT, by either extending or truncating it. |
137 | SDValue getFPExtOrFPRound(SelectionDAG &DAG, |
138 | SDValue Op, |
139 | const SDLoc &DL, |
140 | EVT VT) const; |
141 | |
142 | SDValue convertArgType( |
143 | SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, |
144 | bool Signed, const ISD::InputArg *Arg = nullptr) const; |
145 | |
146 | /// Custom lowering for ISD::FP_ROUND for MVT::f16. |
147 | SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
148 | SDValue splitFP_ROUNDVectorOp(SDValue Op, SelectionDAG &DAG) const; |
149 | SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; |
150 | SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const; |
151 | SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const; |
152 | SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; |
153 | SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const; |
154 | SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; |
155 | SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const; |
156 | SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; |
157 | SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; |
158 | |
159 | SDValue getSegmentAperture(unsigned AS, const SDLoc &DL, |
160 | SelectionDAG &DAG) const; |
161 | |
162 | SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; |
163 | SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
164 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
165 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
166 | SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
167 | SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
168 | SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
169 | |
170 | SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; |
171 | SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const; |
172 | SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const; |
173 | SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const; |
174 | SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; |
175 | |
176 | SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; |
177 | |
178 | SDValue performUCharToFloatCombine(SDNode *N, |
179 | DAGCombinerInfo &DCI) const; |
180 | SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
181 | |
182 | SDValue performSHLPtrCombine(SDNode *N, |
183 | unsigned AS, |
184 | EVT MemVT, |
185 | DAGCombinerInfo &DCI) const; |
186 | |
187 | SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const; |
188 | |
189 | SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL, |
190 | unsigned Opc, SDValue LHS, |
191 | const ConstantSDNode *CRHS) const; |
192 | |
193 | SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
194 | SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
195 | SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
196 | SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
197 | SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
198 | SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
199 | SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT, |
200 | const APFloat &C) const; |
201 | SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
202 | |
203 | SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
204 | SDValue Op0, SDValue Op1) const; |
205 | SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
206 | SDValue Src, SDValue MinVal, SDValue MaxVal, |
207 | bool Signed) const; |
208 | SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
209 | SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const; |
210 | SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
211 | SDValue (SDNode *N, DAGCombinerInfo &DCI) const; |
212 | SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
213 | SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
214 | |
215 | SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const; |
216 | unsigned getFusedOpcode(const SelectionDAG &DAG, |
217 | const SDNode *N0, const SDNode *N1) const; |
218 | SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const; |
219 | SDValue foldAddSub64WithZeroLowBitsTo32(SDNode *N, |
220 | DAGCombinerInfo &DCI) const; |
221 | |
222 | SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
223 | SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
224 | SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
225 | SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
226 | SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
227 | SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
228 | SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
229 | SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
230 | SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const; |
231 | SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
232 | SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
233 | SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
234 | SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
235 | |
236 | bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; |
237 | |
238 | unsigned isCFIntrinsic(const SDNode *Intr) const; |
239 | |
240 | public: |
241 | /// \returns True if fixup needs to be emitted for given global value \p GV, |
242 | /// false otherwise. |
243 | bool shouldEmitFixup(const GlobalValue *GV) const; |
244 | |
245 | /// \returns True if GOT relocation needs to be emitted for given global value |
246 | /// \p GV, false otherwise. |
247 | bool shouldEmitGOTReloc(const GlobalValue *GV) const; |
248 | |
249 | /// \returns True if PC-relative relocation needs to be emitted for given |
250 | /// global value \p GV, false otherwise. |
251 | bool shouldEmitPCReloc(const GlobalValue *GV) const; |
252 | |
253 | /// \returns true if this should use a literal constant for an LDS address, |
254 | /// and not emit a relocation for an LDS global. |
255 | bool shouldUseLDSConstAddress(const GlobalValue *GV) const; |
256 | |
257 | /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be |
258 | /// expanded into a set of cmp/select instructions. |
259 | static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, |
260 | bool IsDivergentIdx, |
261 | const GCNSubtarget *Subtarget); |
262 | |
263 | bool shouldExpandVectorDynExt(SDNode *N) const; |
264 | |
265 | bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override; |
266 | |
267 | private: |
268 | // Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store |
269 | // the three offsets (voffset, soffset and instoffset) into the SDValue[3] |
270 | // array pointed to by Offsets. |
271 | void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, |
272 | SDValue *Offsets, Align Alignment = Align(4)) const; |
273 | |
274 | // Convert the i128 that an addrspace(8) pointer is natively represented as |
275 | // into the v4i32 that all the buffer intrinsics expect to receive. We can't |
276 | // add register classes for i128 on pain of the promotion logic going haywire, |
277 | // so this slightly ugly hack is what we've got. If passed a non-pointer |
278 | // argument (as would be seen in older buffer intrinsics), does nothing. |
279 | SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const; |
280 | |
281 | // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code |
282 | // represents ptr addrspace(8)) using the flags specified in the intrinsic. |
283 | SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const; |
284 | |
285 | // Handle 8 bit and 16 bit buffer loads |
286 | SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, |
287 | ArrayRef<SDValue> Ops, |
288 | MachineMemOperand *MMO, |
289 | bool IsTFE = false) const; |
290 | |
291 | // Handle 8 bit and 16 bit buffer stores |
292 | SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType, |
293 | SDLoc DL, SDValue Ops[], |
294 | MemSDNode *M) const; |
295 | |
296 | public: |
297 | SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); |
298 | |
299 | const GCNSubtarget *getSubtarget() const; |
300 | |
301 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
302 | |
303 | bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, |
304 | EVT SrcVT) const override; |
305 | |
306 | bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, |
307 | LLT SrcTy) const override; |
308 | |
309 | bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override; |
310 | |
311 | // While address space 7 should never make it to codegen, it still needs to |
312 | // have a MVT to prevent some analyses that query this function from breaking. |
313 | // We use the custum MVT::amdgpuBufferFatPointer and |
314 | // amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the |
315 | // memory type (which is probably unused). |
316 | MVT getPointerTy(const DataLayout &DL, unsigned AS) const override; |
317 | MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override; |
318 | |
319 | bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
320 | MachineFunction &MF, |
321 | unsigned IntrinsicID) const override; |
322 | |
323 | void CollectTargetIntrinsicOperands(const CallInst &I, |
324 | SmallVectorImpl<SDValue> &Ops, |
325 | SelectionDAG &DAG) const override; |
326 | |
327 | bool getAddrModeArguments(const IntrinsicInst *I, |
328 | SmallVectorImpl<Value *> &Ops, |
329 | Type *&AccessTy) const override; |
330 | |
331 | bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const; |
332 | bool isLegalGlobalAddressingMode(const AddrMode &AM) const; |
333 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
334 | unsigned AS, |
335 | Instruction *I = nullptr) const override; |
336 | |
337 | bool canMergeStoresTo(unsigned AS, EVT MemVT, |
338 | const MachineFunction &MF) const override; |
339 | |
340 | bool allowsMisalignedMemoryAccessesImpl( |
341 | unsigned Size, unsigned AddrSpace, Align Alignment, |
342 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
343 | unsigned *IsFast = nullptr) const; |
344 | |
345 | bool allowsMisalignedMemoryAccesses( |
346 | LLT Ty, unsigned AddrSpace, Align Alignment, |
347 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
348 | unsigned *IsFast = nullptr) const override { |
349 | if (IsFast) |
350 | *IsFast = 0; |
351 | return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace, |
352 | Alignment, Flags, IsFast); |
353 | } |
354 | |
355 | bool allowsMisalignedMemoryAccesses( |
356 | EVT VT, unsigned AS, Align Alignment, |
357 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
358 | unsigned *IsFast = nullptr) const override; |
359 | |
360 | EVT getOptimalMemOpType(const MemOp &Op, |
361 | const AttributeList &FuncAttributes) const override; |
362 | |
363 | bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; |
364 | |
365 | static bool isNonGlobalAddrSpace(unsigned AS); |
366 | |
367 | bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; |
368 | |
369 | TargetLoweringBase::LegalizeTypeAction |
370 | getPreferredVectorAction(MVT VT) const override; |
371 | |
372 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
373 | Type *Ty) const override; |
374 | |
375 | bool (EVT ResVT, EVT SrcVT, |
376 | unsigned Index) const override; |
377 | bool (EVT VT, unsigned Index) const override; |
378 | |
379 | bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; |
380 | |
381 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
382 | |
383 | unsigned combineRepeatedFPDivisors() const override { |
384 | // Combine multiple FDIVs with the same divisor into multiple FMULs by the |
385 | // reciprocal. |
386 | return 2; |
387 | } |
388 | |
389 | bool supportSplitCSR(MachineFunction *MF) const override; |
390 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
391 | void insertCopiesSplitCSR( |
392 | MachineBasicBlock *Entry, |
393 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
394 | |
395 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
396 | bool isVarArg, |
397 | const SmallVectorImpl<ISD::InputArg> &Ins, |
398 | const SDLoc &DL, SelectionDAG &DAG, |
399 | SmallVectorImpl<SDValue> &InVals) const override; |
400 | |
401 | bool CanLowerReturn(CallingConv::ID CallConv, |
402 | MachineFunction &MF, bool isVarArg, |
403 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
404 | LLVMContext &Context, const Type *RetTy) const override; |
405 | |
406 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
407 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
408 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
409 | SelectionDAG &DAG) const override; |
410 | |
411 | void passSpecialInputs( |
412 | CallLoweringInfo &CLI, |
413 | CCState &CCInfo, |
414 | const SIMachineFunctionInfo &Info, |
415 | SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, |
416 | SmallVectorImpl<SDValue> &MemOpChains, |
417 | SDValue Chain) const; |
418 | |
419 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
420 | CallingConv::ID CallConv, bool isVarArg, |
421 | const SmallVectorImpl<ISD::InputArg> &Ins, |
422 | const SDLoc &DL, SelectionDAG &DAG, |
423 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
424 | SDValue ThisVal) const; |
425 | |
426 | bool mayBeEmittedAsTailCall(const CallInst *) const override; |
427 | |
428 | bool isEligibleForTailCallOptimization( |
429 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
430 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
431 | const SmallVectorImpl<SDValue> &OutVals, |
432 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; |
433 | |
434 | SDValue LowerCall(CallLoweringInfo &CLI, |
435 | SmallVectorImpl<SDValue> &InVals) const override; |
436 | |
437 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
438 | SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; |
439 | SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
440 | SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
441 | |
442 | SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; |
443 | SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
444 | SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
445 | SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
446 | |
447 | Register getRegisterByName(const char* RegName, LLT VT, |
448 | const MachineFunction &MF) const override; |
449 | |
450 | MachineBasicBlock *splitKillBlock(MachineInstr &MI, |
451 | MachineBasicBlock *BB) const; |
452 | |
453 | void bundleInstWithWaitcnt(MachineInstr &MI) const; |
454 | MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, |
455 | MachineBasicBlock *BB) const; |
456 | |
457 | MachineBasicBlock * |
458 | EmitInstrWithCustomInserter(MachineInstr &MI, |
459 | MachineBasicBlock *BB) const override; |
460 | |
461 | bool enableAggressiveFMAFusion(EVT VT) const override; |
462 | bool enableAggressiveFMAFusion(LLT Ty) const override; |
463 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
464 | EVT VT) const override; |
465 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; |
466 | LLT getPreferredShiftAmountTy(LLT Ty) const override; |
467 | |
468 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
469 | EVT VT) const override; |
470 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
471 | const LLT Ty) const override; |
472 | bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override; |
473 | bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override; |
474 | |
475 | SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
476 | SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
477 | SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
478 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
479 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
480 | SelectionDAG &DAG) const override; |
481 | |
482 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
483 | SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; |
484 | void AddMemOpInit(MachineInstr &MI) const; |
485 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
486 | SDNode *Node) const override; |
487 | |
488 | SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; |
489 | |
490 | MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, |
491 | SDValue Ptr) const; |
492 | MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, |
493 | uint32_t RsrcDword1, uint64_t RsrcDword2And3) const; |
494 | std::pair<unsigned, const TargetRegisterClass *> |
495 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
496 | StringRef Constraint, MVT VT) const override; |
497 | ConstraintType getConstraintType(StringRef Constraint) const override; |
498 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
499 | std::vector<SDValue> &Ops, |
500 | SelectionDAG &DAG) const override; |
501 | bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const; |
502 | bool checkAsmConstraintVal(SDValue Op, StringRef Constraint, |
503 | uint64_t Val) const; |
504 | bool checkAsmConstraintValA(SDValue Op, |
505 | uint64_t Val, |
506 | unsigned MaxSize = 64) const; |
507 | SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, |
508 | SDValue V) const; |
509 | |
510 | void finalizeLowering(MachineFunction &MF) const override; |
511 | |
512 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
513 | const APInt &DemandedElts, |
514 | const SelectionDAG &DAG, |
515 | unsigned Depth = 0) const override; |
516 | void computeKnownBitsForFrameIndex(int FrameIdx, |
517 | KnownBits &Known, |
518 | const MachineFunction &MF) const override; |
519 | void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R, |
520 | KnownBits &Known, |
521 | const APInt &DemandedElts, |
522 | const MachineRegisterInfo &MRI, |
523 | unsigned Depth = 0) const override; |
524 | |
525 | Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis, |
526 | Register R, |
527 | const MachineRegisterInfo &MRI, |
528 | unsigned Depth = 0) const override; |
529 | bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, |
530 | UniformityInfo *UA) const override; |
531 | |
532 | bool hasMemSDNodeUser(SDNode *N) const; |
533 | |
534 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
535 | SDValue N1) const override; |
536 | |
537 | bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
538 | Register N1) const override; |
539 | |
540 | bool isCanonicalized(SelectionDAG &DAG, SDValue Op, |
541 | unsigned MaxDepth = 5) const; |
542 | bool isCanonicalized(Register Reg, const MachineFunction &MF, |
543 | unsigned MaxDepth = 5) const; |
544 | bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; |
545 | bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; |
546 | |
547 | bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
548 | const TargetRegisterInfo *TRI, |
549 | const TargetInstrInfo *TII, |
550 | MCRegister &PhysReg, int &Cost) const override; |
551 | |
552 | bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, |
553 | const SelectionDAG &DAG, bool SNaN = false, |
554 | unsigned Depth = 0) const override; |
555 | AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; |
556 | AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
557 | AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
558 | AtomicExpansionKind |
559 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
560 | |
561 | void emitExpandAtomicAddrSpacePredicate(Instruction *AI) const; |
562 | void emitExpandAtomicRMW(AtomicRMWInst *AI) const override; |
563 | void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const override; |
564 | |
565 | LoadInst * |
566 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
567 | |
568 | const TargetRegisterClass *getRegClassFor(MVT VT, |
569 | bool isDivergent) const override; |
570 | bool requiresUniformRegister(MachineFunction &MF, |
571 | const Value *V) const override; |
572 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
573 | |
574 | void allocateHSAUserSGPRs(CCState &CCInfo, |
575 | MachineFunction &MF, |
576 | const SIRegisterInfo &TRI, |
577 | SIMachineFunctionInfo &Info) const; |
578 | |
579 | void allocatePreloadKernArgSGPRs(CCState &CCInfo, |
580 | SmallVectorImpl<CCValAssign> &ArgLocs, |
581 | const SmallVectorImpl<ISD::InputArg> &Ins, |
582 | MachineFunction &MF, |
583 | const SIRegisterInfo &TRI, |
584 | SIMachineFunctionInfo &Info) const; |
585 | |
586 | void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF, |
587 | const SIRegisterInfo &TRI, |
588 | SIMachineFunctionInfo &Info) const; |
589 | |
590 | void allocateSystemSGPRs(CCState &CCInfo, |
591 | MachineFunction &MF, |
592 | SIMachineFunctionInfo &Info, |
593 | CallingConv::ID CallConv, |
594 | bool IsShader) const; |
595 | |
596 | void allocateSpecialEntryInputVGPRs(CCState &CCInfo, |
597 | MachineFunction &MF, |
598 | const SIRegisterInfo &TRI, |
599 | SIMachineFunctionInfo &Info) const; |
600 | void allocateSpecialInputSGPRs( |
601 | CCState &CCInfo, |
602 | MachineFunction &MF, |
603 | const SIRegisterInfo &TRI, |
604 | SIMachineFunctionInfo &Info) const; |
605 | |
606 | void allocateSpecialInputVGPRs(CCState &CCInfo, |
607 | MachineFunction &MF, |
608 | const SIRegisterInfo &TRI, |
609 | SIMachineFunctionInfo &Info) const; |
610 | void allocateSpecialInputVGPRsFixed(CCState &CCInfo, |
611 | MachineFunction &MF, |
612 | const SIRegisterInfo &TRI, |
613 | SIMachineFunctionInfo &Info) const; |
614 | |
615 | MachineMemOperand::Flags |
616 | getTargetMMOFlags(const Instruction &I) const override; |
617 | }; |
618 | |
619 | // Returns true if argument is a boolean value which is not serialized into |
620 | // memory or argument and does not require v_cndmask_b32 to be deserialized. |
621 | bool isBoolSGPR(SDValue V); |
622 | |
623 | } // End namespace llvm |
624 | |
625 | #endif |
626 | |