1 | //===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// SI DAG Lowering interface definition |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
15 | #define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H |
16 | |
17 | #include "AMDGPUISelLowering.h" |
18 | #include "AMDGPUArgumentUsageInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | |
21 | namespace llvm { |
22 | |
23 | class GCNSubtarget; |
24 | class SIMachineFunctionInfo; |
25 | class SIRegisterInfo; |
26 | |
27 | namespace AMDGPU { |
28 | struct ImageDimIntrinsicInfo; |
29 | } |
30 | |
31 | class SITargetLowering final : public AMDGPUTargetLowering { |
32 | private: |
33 | const GCNSubtarget *Subtarget; |
34 | |
35 | public: |
36 | MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
37 | CallingConv::ID CC, |
38 | EVT VT) const override; |
39 | unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
40 | CallingConv::ID CC, |
41 | EVT VT) const override; |
42 | |
43 | unsigned getVectorTypeBreakdownForCallingConv( |
44 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
45 | unsigned &NumIntermediates, MVT &RegisterVT) const override; |
46 | |
47 | private: |
48 | SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, |
49 | SDValue Chain, uint64_t Offset) const; |
50 | SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; |
51 | SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const; |
52 | SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, |
53 | const SDLoc &SL, SDValue Chain, |
54 | uint64_t Offset, Align Alignment, |
55 | bool Signed, |
56 | const ISD::InputArg *Arg = nullptr) const; |
57 | SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL, |
58 | Align Alignment, |
59 | ImplicitParameter Param) const; |
60 | |
61 | SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, |
62 | const SDLoc &SL, SDValue Chain, |
63 | const ISD::InputArg &Arg) const; |
64 | SDValue getPreloadedValue(SelectionDAG &DAG, |
65 | const SIMachineFunctionInfo &MFI, |
66 | EVT VT, |
67 | AMDGPUFunctionArgInfo::PreloadedValue) const; |
68 | |
69 | SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, |
70 | SelectionDAG &DAG) const override; |
71 | SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, |
72 | MVT VT, unsigned Offset) const; |
73 | SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, |
74 | SelectionDAG &DAG, bool WithChain) const; |
75 | SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, |
76 | SDValue CachePolicy, SelectionDAG &DAG) const; |
77 | |
78 | SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
79 | unsigned NewOpcode) const; |
80 | SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, |
81 | unsigned NewOpcode) const; |
82 | |
83 | SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; |
84 | SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, |
85 | const ArgDescriptor &ArgDesc) const; |
86 | |
87 | SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
88 | SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; |
89 | SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; |
90 | |
91 | // The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset |
92 | // (the offset that is included in bounds checking and swizzling, to be split |
93 | // between the instruction's voffset and immoffset fields) and soffset (the |
94 | // offset that is excluded from bounds checking and swizzling, to go in the |
95 | // instruction's soffset field). This function takes the first kind of |
96 | // offset and figures out how to split it between voffset and immoffset. |
97 | std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset, |
98 | SelectionDAG &DAG) const; |
99 | |
100 | SDValue widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const; |
101 | SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; |
102 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
103 | SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const; |
104 | SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const; |
105 | SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const; |
106 | SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const; |
107 | SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; |
108 | SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; |
109 | SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; |
110 | SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const; |
111 | SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; |
112 | SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; |
113 | SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const; |
114 | SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const; |
115 | SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const; |
116 | SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; |
117 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; |
118 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; |
119 | SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, |
120 | SelectionDAG &DAG, ArrayRef<SDValue> Ops, |
121 | bool IsIntrinsic = false) const; |
122 | |
123 | SDValue lowerIntrinsicLoad(MemSDNode *M, bool IsFormat, SelectionDAG &DAG, |
124 | ArrayRef<SDValue> Ops) const; |
125 | |
126 | // Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to |
127 | // dwordx4 if on SI. |
128 | SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
129 | ArrayRef<SDValue> Ops, EVT MemVT, |
130 | MachineMemOperand *MMO, SelectionDAG &DAG) const; |
131 | |
132 | SDValue handleD16VData(SDValue VData, SelectionDAG &DAG, |
133 | bool ImageStore = false) const; |
134 | |
135 | /// Converts \p Op, which must be of floating point type, to the |
136 | /// floating point type \p VT, by either extending or truncating it. |
137 | SDValue getFPExtOrFPRound(SelectionDAG &DAG, |
138 | SDValue Op, |
139 | const SDLoc &DL, |
140 | EVT VT) const; |
141 | |
142 | SDValue convertArgType( |
143 | SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, |
144 | bool Signed, const ISD::InputArg *Arg = nullptr) const; |
145 | |
146 | /// Custom lowering for ISD::FP_ROUND for MVT::f16. |
147 | SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; |
148 | SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const; |
149 | SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; |
150 | SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const; |
151 | SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; |
152 | SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; |
153 | |
154 | SDValue getSegmentAperture(unsigned AS, const SDLoc &DL, |
155 | SelectionDAG &DAG) const; |
156 | |
157 | SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; |
158 | SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; |
159 | SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
160 | SDValue (SDValue Op, SelectionDAG &DAG) const; |
161 | SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
162 | SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
163 | SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
164 | |
165 | SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const; |
166 | SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const; |
167 | SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const; |
168 | SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const; |
169 | SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const; |
170 | |
171 | SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; |
172 | |
173 | SDValue performUCharToFloatCombine(SDNode *N, |
174 | DAGCombinerInfo &DCI) const; |
175 | SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
176 | |
177 | SDValue performSHLPtrCombine(SDNode *N, |
178 | unsigned AS, |
179 | EVT MemVT, |
180 | DAGCombinerInfo &DCI) const; |
181 | |
182 | SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const; |
183 | |
184 | SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL, |
185 | unsigned Opc, SDValue LHS, |
186 | const ConstantSDNode *CRHS) const; |
187 | |
188 | SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
189 | SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
190 | SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
191 | SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
192 | SDValue performSignExtendInRegCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
193 | SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
194 | SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT, |
195 | const APFloat &C) const; |
196 | SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
197 | |
198 | SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
199 | SDValue Op0, SDValue Op1) const; |
200 | SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, |
201 | SDValue Src, SDValue MinVal, SDValue MaxVal, |
202 | bool Signed) const; |
203 | SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
204 | SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const; |
205 | SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
206 | SDValue (SDNode *N, DAGCombinerInfo &DCI) const; |
207 | SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
208 | SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
209 | |
210 | SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const; |
211 | unsigned getFusedOpcode(const SelectionDAG &DAG, |
212 | const SDNode *N0, const SDNode *N1) const; |
213 | SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const; |
214 | SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
215 | SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
216 | SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
217 | SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
218 | SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
219 | SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
220 | SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const; |
221 | SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
222 | SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
223 | SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
224 | SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
225 | |
226 | bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; |
227 | |
228 | unsigned isCFIntrinsic(const SDNode *Intr) const; |
229 | |
230 | public: |
231 | /// \returns True if fixup needs to be emitted for given global value \p GV, |
232 | /// false otherwise. |
233 | bool shouldEmitFixup(const GlobalValue *GV) const; |
234 | |
235 | /// \returns True if GOT relocation needs to be emitted for given global value |
236 | /// \p GV, false otherwise. |
237 | bool shouldEmitGOTReloc(const GlobalValue *GV) const; |
238 | |
239 | /// \returns True if PC-relative relocation needs to be emitted for given |
240 | /// global value \p GV, false otherwise. |
241 | bool shouldEmitPCReloc(const GlobalValue *GV) const; |
242 | |
243 | /// \returns true if this should use a literal constant for an LDS address, |
244 | /// and not emit a relocation for an LDS global. |
245 | bool shouldUseLDSConstAddress(const GlobalValue *GV) const; |
246 | |
247 | /// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be |
248 | /// expanded into a set of cmp/select instructions. |
249 | static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, |
250 | bool IsDivergentIdx, |
251 | const GCNSubtarget *Subtarget); |
252 | |
253 | bool shouldExpandVectorDynExt(SDNode *N) const; |
254 | |
255 | private: |
256 | // Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store |
257 | // the three offsets (voffset, soffset and instoffset) into the SDValue[3] |
258 | // array pointed to by Offsets. |
259 | void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, |
260 | SDValue *Offsets, Align Alignment = Align(4)) const; |
261 | |
262 | // Convert the i128 that an addrspace(8) pointer is natively represented as |
263 | // into the v4i32 that all the buffer intrinsics expect to receive. We can't |
264 | // add register classes for i128 on pain of the promotion logic going haywire, |
265 | // so this slightly ugly hack is what we've got. If passed a non-pointer |
266 | // argument (as would be seen in older buffer intrinsics), does nothing. |
267 | SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const; |
268 | |
269 | // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code |
270 | // represents ptr addrspace(8)) using the flags specified in the intrinsic. |
271 | SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const; |
272 | |
273 | // Handle 8 bit and 16 bit buffer loads |
274 | SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, |
275 | ArrayRef<SDValue> Ops, |
276 | MachineMemOperand *MMO, |
277 | bool IsTFE = false) const; |
278 | |
279 | // Handle 8 bit and 16 bit buffer stores |
280 | SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType, |
281 | SDLoc DL, SDValue Ops[], |
282 | MemSDNode *M) const; |
283 | |
284 | public: |
285 | SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI); |
286 | |
287 | const GCNSubtarget *getSubtarget() const; |
288 | |
289 | ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; |
290 | |
291 | bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, |
292 | EVT SrcVT) const override; |
293 | |
294 | bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, |
295 | LLT SrcTy) const override; |
296 | |
297 | bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override; |
298 | |
299 | // While address space 7 should never make it to codegen, it still needs to |
300 | // have a MVT to prevent some analyses that query this function from breaking, |
301 | // so, to work around the lack of i160, map it to v5i32. |
302 | MVT getPointerTy(const DataLayout &DL, unsigned AS) const override; |
303 | MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override; |
304 | |
305 | bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
306 | MachineFunction &MF, |
307 | unsigned IntrinsicID) const override; |
308 | |
309 | void CollectTargetIntrinsicOperands(const CallInst &I, |
310 | SmallVectorImpl<SDValue> &Ops, |
311 | SelectionDAG &DAG) const override; |
312 | |
313 | bool getAddrModeArguments(IntrinsicInst * /*I*/, |
314 | SmallVectorImpl<Value*> &/*Ops*/, |
315 | Type *&/*AccessTy*/) const override; |
316 | |
317 | bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const; |
318 | bool isLegalGlobalAddressingMode(const AddrMode &AM) const; |
319 | bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, |
320 | unsigned AS, |
321 | Instruction *I = nullptr) const override; |
322 | |
323 | bool canMergeStoresTo(unsigned AS, EVT MemVT, |
324 | const MachineFunction &MF) const override; |
325 | |
326 | bool allowsMisalignedMemoryAccessesImpl( |
327 | unsigned Size, unsigned AddrSpace, Align Alignment, |
328 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
329 | unsigned *IsFast = nullptr) const; |
330 | |
331 | bool allowsMisalignedMemoryAccesses( |
332 | LLT Ty, unsigned AddrSpace, Align Alignment, |
333 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
334 | unsigned *IsFast = nullptr) const override { |
335 | if (IsFast) |
336 | *IsFast = 0; |
337 | return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace, |
338 | Alignment, Flags, IsFast); |
339 | } |
340 | |
341 | bool allowsMisalignedMemoryAccesses( |
342 | EVT VT, unsigned AS, Align Alignment, |
343 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
344 | unsigned *IsFast = nullptr) const override; |
345 | |
346 | EVT getOptimalMemOpType(const MemOp &Op, |
347 | const AttributeList &FuncAttributes) const override; |
348 | |
349 | bool isMemOpUniform(const SDNode *N) const; |
350 | bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const; |
351 | |
352 | static bool isNonGlobalAddrSpace(unsigned AS); |
353 | |
354 | bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; |
355 | |
356 | TargetLoweringBase::LegalizeTypeAction |
357 | getPreferredVectorAction(MVT VT) const override; |
358 | |
359 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
360 | Type *Ty) const override; |
361 | |
362 | bool (EVT ResVT, EVT SrcVT, |
363 | unsigned Index) const override; |
364 | |
365 | bool isTypeDesirableForOp(unsigned Op, EVT VT) const override; |
366 | |
367 | bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; |
368 | |
369 | unsigned combineRepeatedFPDivisors() const override { |
370 | // Combine multiple FDIVs with the same divisor into multiple FMULs by the |
371 | // reciprocal. |
372 | return 2; |
373 | } |
374 | |
375 | bool supportSplitCSR(MachineFunction *MF) const override; |
376 | void initializeSplitCSR(MachineBasicBlock *Entry) const override; |
377 | void insertCopiesSplitCSR( |
378 | MachineBasicBlock *Entry, |
379 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; |
380 | |
381 | SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, |
382 | bool isVarArg, |
383 | const SmallVectorImpl<ISD::InputArg> &Ins, |
384 | const SDLoc &DL, SelectionDAG &DAG, |
385 | SmallVectorImpl<SDValue> &InVals) const override; |
386 | |
387 | bool CanLowerReturn(CallingConv::ID CallConv, |
388 | MachineFunction &MF, bool isVarArg, |
389 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
390 | LLVMContext &Context) const override; |
391 | |
392 | SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
393 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
394 | const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, |
395 | SelectionDAG &DAG) const override; |
396 | |
397 | void passSpecialInputs( |
398 | CallLoweringInfo &CLI, |
399 | CCState &CCInfo, |
400 | const SIMachineFunctionInfo &Info, |
401 | SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, |
402 | SmallVectorImpl<SDValue> &MemOpChains, |
403 | SDValue Chain) const; |
404 | |
405 | SDValue LowerCallResult(SDValue Chain, SDValue InGlue, |
406 | CallingConv::ID CallConv, bool isVarArg, |
407 | const SmallVectorImpl<ISD::InputArg> &Ins, |
408 | const SDLoc &DL, SelectionDAG &DAG, |
409 | SmallVectorImpl<SDValue> &InVals, bool isThisReturn, |
410 | SDValue ThisVal) const; |
411 | |
412 | bool mayBeEmittedAsTailCall(const CallInst *) const override; |
413 | |
414 | bool isEligibleForTailCallOptimization( |
415 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
416 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
417 | const SmallVectorImpl<SDValue> &OutVals, |
418 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const; |
419 | |
420 | SDValue LowerCall(CallLoweringInfo &CLI, |
421 | SmallVectorImpl<SDValue> &InVals) const override; |
422 | |
423 | SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const; |
424 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; |
425 | SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; |
426 | SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
427 | SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; |
428 | |
429 | SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; |
430 | SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; |
431 | SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
432 | SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const; |
433 | |
434 | Register getRegisterByName(const char* RegName, LLT VT, |
435 | const MachineFunction &MF) const override; |
436 | |
437 | MachineBasicBlock *splitKillBlock(MachineInstr &MI, |
438 | MachineBasicBlock *BB) const; |
439 | |
440 | void bundleInstWithWaitcnt(MachineInstr &MI) const; |
441 | MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI, |
442 | MachineBasicBlock *BB) const; |
443 | |
444 | MachineBasicBlock * |
445 | EmitInstrWithCustomInserter(MachineInstr &MI, |
446 | MachineBasicBlock *BB) const override; |
447 | |
448 | bool enableAggressiveFMAFusion(EVT VT) const override; |
449 | bool enableAggressiveFMAFusion(LLT Ty) const override; |
450 | EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
451 | EVT VT) const override; |
452 | MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; |
453 | LLT getPreferredShiftAmountTy(LLT Ty) const override; |
454 | |
455 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
456 | EVT VT) const override; |
457 | bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
458 | const LLT Ty) const override; |
459 | bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override; |
460 | bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override; |
461 | |
462 | SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
463 | SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
464 | SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const; |
465 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
466 | |
467 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, |
468 | SelectionDAG &DAG) const override; |
469 | |
470 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
471 | SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; |
472 | void AddMemOpInit(MachineInstr &MI) const; |
473 | void AdjustInstrPostInstrSelection(MachineInstr &MI, |
474 | SDNode *Node) const override; |
475 | |
476 | SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; |
477 | |
478 | MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, |
479 | SDValue Ptr) const; |
480 | MachineSDNode *buildRSRC(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr, |
481 | uint32_t RsrcDword1, uint64_t RsrcDword2And3) const; |
482 | std::pair<unsigned, const TargetRegisterClass *> |
483 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
484 | StringRef Constraint, MVT VT) const override; |
485 | ConstraintType getConstraintType(StringRef Constraint) const override; |
486 | void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
487 | std::vector<SDValue> &Ops, |
488 | SelectionDAG &DAG) const override; |
489 | bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const; |
490 | bool checkAsmConstraintVal(SDValue Op, StringRef Constraint, |
491 | uint64_t Val) const; |
492 | bool checkAsmConstraintValA(SDValue Op, |
493 | uint64_t Val, |
494 | unsigned MaxSize = 64) const; |
495 | SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, |
496 | SDValue V) const; |
497 | |
498 | void finalizeLowering(MachineFunction &MF) const override; |
499 | |
500 | void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, |
501 | const APInt &DemandedElts, |
502 | const SelectionDAG &DAG, |
503 | unsigned Depth = 0) const override; |
504 | void computeKnownBitsForFrameIndex(int FrameIdx, |
505 | KnownBits &Known, |
506 | const MachineFunction &MF) const override; |
507 | void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, |
508 | KnownBits &Known, |
509 | const APInt &DemandedElts, |
510 | const MachineRegisterInfo &MRI, |
511 | unsigned Depth = 0) const override; |
512 | |
513 | Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, |
514 | const MachineRegisterInfo &MRI, |
515 | unsigned Depth = 0) const override; |
516 | bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI, |
517 | UniformityInfo *UA) const override; |
518 | |
519 | bool hasMemSDNodeUser(SDNode *N) const; |
520 | |
521 | bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
522 | SDValue N1) const override; |
523 | |
524 | bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
525 | Register N1) const override; |
526 | |
527 | bool isCanonicalized(SelectionDAG &DAG, SDValue Op, |
528 | unsigned MaxDepth = 5) const; |
529 | bool isCanonicalized(Register Reg, const MachineFunction &MF, |
530 | unsigned MaxDepth = 5) const; |
531 | bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; |
532 | bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const; |
533 | |
534 | bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
535 | const TargetRegisterInfo *TRI, |
536 | const TargetInstrInfo *TII, unsigned &PhysReg, |
537 | int &Cost) const override; |
538 | |
539 | bool isKnownNeverNaNForTargetNode(SDValue Op, |
540 | const SelectionDAG &DAG, |
541 | bool SNaN = false, |
542 | unsigned Depth = 0) const override; |
543 | AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; |
544 | AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; |
545 | AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; |
546 | AtomicExpansionKind |
547 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; |
548 | void emitExpandAtomicRMW(AtomicRMWInst *AI) const override; |
549 | |
550 | LoadInst * |
551 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; |
552 | |
553 | const TargetRegisterClass *getRegClassFor(MVT VT, |
554 | bool isDivergent) const override; |
555 | bool requiresUniformRegister(MachineFunction &MF, |
556 | const Value *V) const override; |
557 | Align getPrefLoopAlignment(MachineLoop *ML) const override; |
558 | |
559 | void allocateHSAUserSGPRs(CCState &CCInfo, |
560 | MachineFunction &MF, |
561 | const SIRegisterInfo &TRI, |
562 | SIMachineFunctionInfo &Info) const; |
563 | |
564 | void allocatePreloadKernArgSGPRs(CCState &CCInfo, |
565 | SmallVectorImpl<CCValAssign> &ArgLocs, |
566 | const SmallVectorImpl<ISD::InputArg> &Ins, |
567 | MachineFunction &MF, |
568 | const SIRegisterInfo &TRI, |
569 | SIMachineFunctionInfo &Info) const; |
570 | |
571 | void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF, |
572 | const SIRegisterInfo &TRI, |
573 | SIMachineFunctionInfo &Info) const; |
574 | |
575 | void allocateSystemSGPRs(CCState &CCInfo, |
576 | MachineFunction &MF, |
577 | SIMachineFunctionInfo &Info, |
578 | CallingConv::ID CallConv, |
579 | bool IsShader) const; |
580 | |
581 | void allocateSpecialEntryInputVGPRs(CCState &CCInfo, |
582 | MachineFunction &MF, |
583 | const SIRegisterInfo &TRI, |
584 | SIMachineFunctionInfo &Info) const; |
585 | void allocateSpecialInputSGPRs( |
586 | CCState &CCInfo, |
587 | MachineFunction &MF, |
588 | const SIRegisterInfo &TRI, |
589 | SIMachineFunctionInfo &Info) const; |
590 | |
591 | void allocateSpecialInputVGPRs(CCState &CCInfo, |
592 | MachineFunction &MF, |
593 | const SIRegisterInfo &TRI, |
594 | SIMachineFunctionInfo &Info) const; |
595 | void allocateSpecialInputVGPRsFixed(CCState &CCInfo, |
596 | MachineFunction &MF, |
597 | const SIRegisterInfo &TRI, |
598 | SIMachineFunctionInfo &Info) const; |
599 | |
600 | MachineMemOperand::Flags |
601 | getTargetMMOFlags(const Instruction &I) const override; |
602 | }; |
603 | |
604 | // Returns true if argument is a boolean value which is not serialized into |
605 | // memory or argument and does not require v_cndmask_b32 to be deserialized. |
606 | bool isBoolSGPR(SDValue V); |
607 | |
608 | } // End namespace llvm |
609 | |
610 | #endif |
611 | |