SIISelLowering.h source code [llvm_projects/llvm/lib/Target/AMDGPU/SIISelLowering.h]

1	//===-- SIISelLowering.h - SI DAG Lowering Interface ------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// SI DAG Lowering interface definition
11	//
12	//===----------------------------------------------------------------------===//
13
14	#ifndef LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
15	#define LLVM_LIB_TARGET_AMDGPU_SIISELLOWERING_H
16
17	#include "AMDGPUArgumentUsageInfo.h"
18	#include "AMDGPUISelLowering.h"
19	#include "SIDefines.h"
20	#include "llvm/CodeGen/MachineFunction.h"
21
22	namespace llvm {
23
24	class GCNSubtarget;
25	class SIMachineFunctionInfo;
26	class SIRegisterInfo;
27
28	namespace AMDGPU {
29	struct ImageDimIntrinsicInfo;
30	}
31
32	class SITargetLowering final : public AMDGPUTargetLowering {
33	private:
34	const GCNSubtarget *Subtarget;
35
36	public:
37	MVT getRegisterTypeForCallingConv(LLVMContext &Context,
38	CallingConv::ID CC,
39	EVT VT) const override;
40	unsigned getNumRegistersForCallingConv(LLVMContext &Context,
41	CallingConv::ID CC,
42	EVT VT) const override;
43
44	unsigned getVectorTypeBreakdownForCallingConv(
45	LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
46	unsigned &NumIntermediates, MVT &RegisterVT) const override;
47
48	MachinePointerInfo getKernargSegmentPtrInfo(MachineFunction &MF) const;
49
50	private:
51	SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
52	SDValue Chain, uint64_t Offset) const;
53	SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
54	SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const;
55	SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
56	const SDLoc &SL, SDValue Chain,
57	uint64_t Offset, Align Alignment,
58	bool Signed,
59	const ISD::InputArg Arg = nullptr) const*;
60	SDValue loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT, const SDLoc &DL,
61	Align Alignment,
62	ImplicitParameter Param) const;
63
64	SDValue convertABITypeToValueType(SelectionDAG &DAG, SDValue Val,
65	CCValAssign &VA, const SDLoc &SL) const;
66
67	SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
68	const SDLoc &SL, SDValue Chain,
69	const ISD::InputArg &Arg) const;
70	SDValue lowerWorkGroupId(
71	SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT,
72	AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV,
73	AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV,
74	AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const;
75	SDValue getPreloadedValue(SelectionDAG &DAG,
76	const SIMachineFunctionInfo &MFI,
77	EVT VT,
78	AMDGPUFunctionArgInfo::PreloadedValue) const;
79
80	SDValue LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI, SDValue Op,
81	SelectionDAG &DAG) const override;
82	SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
83
84	SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
85	MVT VT, unsigned Offset) const;
86	SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
87	SelectionDAG &DAG, bool WithChain) const;
88	SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
89	SDValue CachePolicy, SelectionDAG &DAG) const;
90
91	SDValue lowerRawBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
92	unsigned NewOpcode) const;
93	SDValue lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
94	unsigned NewOpcode) const;
95
96	SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const;
97	SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op,
98	AMDGPU::Hwreg::Id HwReg, unsigned LowBit,
99	unsigned Width) const;
100	SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim,
101	const ArgDescriptor &ArgDesc) const;
102
103	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
104	SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
105	SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
106
107	// The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
108	// (the offset that is included in bounds checking and swizzling, to be split
109	// between the instruction's voffset and immoffset fields) and soffset (the
110	// offset that is excluded from bounds checking and swizzling, to go in the
111	// instruction's soffset field). This function takes the first kind of
112	// offset and figures out how to split it between voffset and immoffset.
113	std::pair<SDValue, SDValue> splitBufferOffsets(SDValue Offset,
114	SelectionDAG &DAG) const;
115
116	SDValue widenLoad(LoadSDNode Ld, DAGCombinerInfo &DCI) const*;
117	SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
118	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
119	SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
120	SDValue lowerFastUnsafeFDIV64(SDValue Op, SelectionDAG &DAG) const;
121	SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
122	SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
123	SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
124	SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
125	SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
126	SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const;
127	SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
128	SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
129	SDValue lowerFSQRTF16(SDValue Op, SelectionDAG &DAG) const;
130	SDValue lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const;
131	SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const;
132	SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
133	SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
134	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
135	SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
136	SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M,
137	SelectionDAG &DAG, ArrayRef<SDValue> Ops,
138	bool IsIntrinsic = false) const;
139
140	SDValue lowerIntrinsicLoad(MemSDNode M, bool* IsFormat, SelectionDAG &DAG,
141	ArrayRef<SDValue> Ops) const;
142
143	// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
144	// dwordx4 if on SI.
145	SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
146	ArrayRef<SDValue> Ops, EVT MemVT,
147	MachineMemOperand MMO, SelectionDAG &DAG) const*;
148
149	SDValue handleD16VData(SDValue VData, SelectionDAG &DAG,
150	bool ImageStore = false) const;
151
152	/// Converts \p Op, which must be of floating point type, to the
153	/// floating point type \p VT, by either extending or truncating it.
154	SDValue getFPExtOrFPRound(SelectionDAG &DAG,
155	SDValue Op,
156	const SDLoc &DL,
157	EVT VT) const;
158
159	SDValue convertArgType(
160	SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
161	bool Signed, const ISD::InputArg Arg = nullptr) const*;
162
163	/// Custom lowering for ISD::FP_ROUND for MVT::f16.
164	SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
165	SDValue splitFP_ROUNDVectorOp(SDValue Op, SelectionDAG &DAG) const;
166	SDValue lowerScalarBF16FAdd(SDValue Op, SelectionDAG &DAG) const;
167	SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
168	SDValue lowerFMINIMUMNUM_FMAXIMUMNUM(SDValue Op, SelectionDAG &DAG) const;
169	SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const;
170	SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
171	SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
172	SDValue promoteUniformUnaryOpToI32(SDValue Op, DAGCombinerInfo &DCI) const;
173	SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
174	SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const;
175	SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
176	SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
177
178	SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
179	SelectionDAG &DAG) const;
180
181	SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
182	SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
183	SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
184	SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
185	SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
186	SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
187	SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
188
189	SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
190	SDValue lowerTrapEndpgm(SDValue Op, SelectionDAG &DAG) const;
191	SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
192	SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
193	SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
194	SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
195
196	SDNode adjustWritemask(MachineSDNode &N, SelectionDAG &DAG) const;
197
198	SDValue performUCharToFloatCombine(SDNode *N,
199	DAGCombinerInfo &DCI) const;
200	SDValue performFCopySignCombine(SDNode N, DAGCombinerInfo &DCI) const*;
201
202	SDValue performSHLPtrCombine(SDNode *N,
203	unsigned AS,
204	EVT MemVT,
205	DAGCombinerInfo &DCI) const;
206
207	SDValue performMemSDNodeCombine(MemSDNode N, DAGCombinerInfo &DCI) const*;
208
209	SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL,
210	unsigned Opc, SDValue LHS,
211	const ConstantSDNode CRHS) const*;
212
213	SDValue performAndCombine(SDNode N, DAGCombinerInfo &DCI) const*;
214	SDValue performOrCombine(SDNode N, DAGCombinerInfo &DCI) const*;
215	SDValue performXorCombine(SDNode N, DAGCombinerInfo &DCI) const*;
216	SDValue performZeroOrAnyExtendCombine(SDNode N, DAGCombinerInfo &DCI) const*;
217	SDValue performSignExtendInRegCombine(SDNode N, DAGCombinerInfo &DCI) const*;
218	SDValue performClassCombine(SDNode N, DAGCombinerInfo &DCI) const*;
219	SDValue getCanonicalConstantFP(SelectionDAG &DAG, const SDLoc &SL, EVT VT,
220	const APFloat &C) const;
221	SDValue performFCanonicalizeCombine(SDNode N, DAGCombinerInfo &DCI) const*;
222
223	SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
224	SDValue Op0, SDValue Op1,
225	bool IsKnownNoNaNs) const;
226	SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
227	SDValue Src, SDValue MinVal, SDValue MaxVal,
228	bool Signed) const;
229	SDValue performMinMaxCombine(SDNode N, DAGCombinerInfo &DCI) const*;
230	SDValue performFMed3Combine(SDNode N, DAGCombinerInfo &DCI) const*;
231	SDValue performCvtPkRTZCombine(SDNode N, DAGCombinerInfo &DCI) const*;
232	SDValue performExtractVectorEltCombine(SDNode N, DAGCombinerInfo &DCI) const*;
233	SDValue performInsertVectorEltCombine(SDNode N, DAGCombinerInfo &DCI) const*;
234	SDValue performFPRoundCombine(SDNode N, DAGCombinerInfo &DCI) const*;
235	SDValue performSelectCombine(SDNode N, DAGCombinerInfo &DCI) const*;
236
237	SDValue reassociateScalarOps(SDNode N, SelectionDAG &DAG) const*;
238	unsigned getFusedOpcode(const SelectionDAG &DAG,
239	const SDNode N0, const* SDNode N1) const*;
240	SDValue tryFoldToMad64_32(SDNode N, DAGCombinerInfo &DCI) const*;
241	SDValue foldAddSub64WithZeroLowBitsTo32(SDNode *N,
242	DAGCombinerInfo &DCI) const;
243
244	SDValue performAddCombine(SDNode N, DAGCombinerInfo &DCI) const*;
245	SDValue performPtrAddCombine(SDNode N, DAGCombinerInfo &DCI) const*;
246	SDValue performSubCombine(SDNode N, DAGCombinerInfo &DCI) const*;
247	SDValue performFAddCombine(SDNode N, DAGCombinerInfo &DCI) const*;
248	SDValue performFSubCombine(SDNode N, DAGCombinerInfo &DCI) const*;
249	SDValue performFDivCombine(SDNode N, DAGCombinerInfo &DCI) const*;
250	SDValue performFMulCombine(SDNode N, DAGCombinerInfo &DCI) const*;
251	SDValue performFMACombine(SDNode N, DAGCombinerInfo &DCI) const*;
252	SDValue performSetCCCombine(SDNode N, DAGCombinerInfo &DCI) const*;
253	SDValue performCvtF32UByteNCombine(SDNode N, DAGCombinerInfo &DCI) const*;
254	SDValue performClampCombine(SDNode N, DAGCombinerInfo &DCI) const*;
255	SDValue performRcpCombine(SDNode N, DAGCombinerInfo &DCI) const*;
256
257	bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
258
259	unsigned isCFIntrinsic(const SDNode Intr) const*;
260
261	public:
262	/// \returns True if fixup needs to be emitted for given global value \p GV,
263	/// false otherwise.
264	bool shouldEmitFixup(const GlobalValue GV) const*;
265
266	/// \returns True if GOT relocation needs to be emitted for given global value
267	/// \p GV, false otherwise.
268	bool shouldEmitGOTReloc(const GlobalValue GV) const*;
269
270	/// \returns True if PC-relative relocation needs to be emitted for given
271	/// global value \p GV, false otherwise.
272	bool shouldEmitPCReloc(const GlobalValue GV) const*;
273
274	/// \returns true if this should use a literal constant for an LDS address,
275	/// and not emit a relocation for an LDS global.
276	bool shouldUseLDSConstAddress(const GlobalValue GV) const*;
277
278	/// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
279	/// expanded into a set of cmp/select instructions.
280	static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem,
281	bool IsDivergentIdx,
282	const GCNSubtarget *Subtarget);
283
284	bool shouldExpandVectorDynExt(SDNode N) const*;
285
286	bool shouldPreservePtrArith(const Function &F, EVT PtrVT) const override;
287
288	bool canTransformPtrArithOutOfBounds(const Function &F,
289	EVT PtrVT) const override;
290
291	private:
292	/// Returns true if the first real instruction in MBB is 8 bytes and could
293	/// be split by a 32-byte fetch window boundary. Used on GFX950 to avoid
294	/// instruction fetch delays.
295	bool needsFetchWindowAlignment(const MachineBasicBlock &MBB) const;
296
297	// Analyze a combined offset from an amdgcn_s_buffer_load intrinsic and store
298	// the three offsets (voffset, soffset and instoffset) into the SDValue[3]
299	// array pointed to by Offsets.
300	void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
301	SDValue Offsets, Align Alignment = Align (`4`)) const*;
302
303	// Convert the i128 that an addrspace(8) pointer is natively represented as
304	// into the v4i32 that all the buffer intrinsics expect to receive. We can't
305	// add register classes for i128 on pain of the promotion logic going haywire,
306	// so this slightly ugly hack is what we've got. If passed a non-pointer
307	// argument (as would be seen in older buffer intrinsics), does nothing.
308	SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const;
309
310	// Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code
311	// represents ptr addrspace(8)) using the flags specified in the intrinsic.
312	SDValue lowerPointerAsRsrcIntrin(SDNode Op, SelectionDAG &DAG) const*;
313
314	// Handle 8 bit and 16 bit buffer loads
315	SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
316	ArrayRef<SDValue> Ops,
317	MachineMemOperand *MMO,
318	bool IsTFE = false) const;
319
320	// Handle 8 bit and 16 bit buffer stores
321	SDValue handleByteShortBufferStores(SelectionDAG &DAG, EVT VDataType,
322	SDLoc DL, SDValue Ops[],
323	MemSDNode M) const*;
324
325	public:
326	SITargetLowering(const TargetMachine &tm, const GCNSubtarget &STI);
327
328	const GCNSubtarget getSubtarget() const*;
329
330	ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
331
332	bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
333	EVT SrcVT) const override;
334
335	bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy,
336	LLT SrcTy) const override;
337
338	bool isShuffleMaskLegal(ArrayRef<int> /Mask/, EVT /VT/) const override;
339
340	// While address space 7 should never make it to codegen, it still needs to
341	// have a MVT to prevent some analyses that query this function from breaking.
342	// We use the custum MVT::amdgpuBufferFatPointer and
343	// amdgpu::amdgpuBufferStridedPointer for this, though we use v8i32 for the
344	// memory type (which is probably unused).
345	MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
346	MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
347
348	void getTgtMemIntrinsic(SmallVectorImpl<IntrinsicInfo> &, const CallBase &,
349	MachineFunction &MF,
350	unsigned IntrinsicID) const override;
351
352	void CollectTargetIntrinsicOperands(const CallInst &I,
353	SmallVectorImpl<SDValue> &Ops,
354	SelectionDAG &DAG) const override;
355
356	bool getAddrModeArguments(const IntrinsicInst *I,
357	SmallVectorImpl<Value *> &Ops,
358	Type &AccessTy) const* override;
359
360	bool isLegalFlatAddressingMode(const AddrMode &AM, unsigned AddrSpace) const;
361	bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
362	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
363	unsigned AS,
364	Instruction I = nullptr) const* override;
365
366	bool canMergeStoresTo(unsigned AS, EVT MemVT,
367	const MachineFunction &MF) const override;
368
369	bool allowsMisalignedMemoryAccessesImpl(
370	unsigned Size, unsigned AddrSpace, Align Alignment,
371	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
372	unsigned IsFast = nullptr) const*;
373
374	bool allowsMisalignedMemoryAccesses(
375	LLT Ty, unsigned AddrSpace, Align Alignment,
376	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
377	unsigned IsFast = nullptr) const* override {
378	if (IsFast)
379	*IsFast = `0`;
380	return allowsMisalignedMemoryAccessesImpl(Size: Ty.getSizeInBits(), AddrSpace,
381	Alignment, Flags, IsFast);
382	}
383
384	bool allowsMisalignedMemoryAccesses(
385	EVT VT, unsigned AS, Align Alignment,
386	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
387	unsigned IsFast = nullptr) const* override;
388
389	EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
390	const AttributeList &FuncAttributes) const override;
391
392	bool isMemOpHasNoClobberedMemOperand(const SDNode N) const*;
393
394	static bool isNonGlobalAddrSpace(unsigned AS);
395
396	bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
397
398	TargetLoweringBase::LegalizeTypeAction
399	getPreferredVectorAction(MVT VT) const override;
400
401	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
402	Type Ty) const* override;
403
404	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
405	unsigned Index) const override;
406	bool isExtractVecEltCheap(EVT VT, unsigned Index) const override;
407
408	bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
409
410	bool isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* override;
411
412	unsigned combineRepeatedFPDivisors() const override {
413	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
414	// reciprocal.
415	return `2`;
416	}
417
418	bool supportSplitCSR(MachineFunction MF) const* override;
419	void initializeSplitCSR(MachineBasicBlock Entry) const* override;
420	void insertCopiesSplitCSR(
421	MachineBasicBlock *Entry,
422	const SmallVectorImpl<MachineBasicBlock > &Exits) const* override;
423
424	SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
425	bool isVarArg,
426	const SmallVectorImpl<ISD::InputArg> &Ins,
427	const SDLoc &DL, SelectionDAG &DAG,
428	SmallVectorImpl<SDValue> &InVals) const override;
429
430	bool CanLowerReturn(CallingConv::ID CallConv,
431	MachineFunction &MF, bool isVarArg,
432	const SmallVectorImpl<ISD::OutputArg> &Outs,
433	LLVMContext &Context, const Type RetTy) const* override;
434
435	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
436	const SmallVectorImpl<ISD::OutputArg> &Outs,
437	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
438	SelectionDAG &DAG) const override;
439
440	void passSpecialInputs(
441	CallLoweringInfo &CLI,
442	CCState &CCInfo,
443	const SIMachineFunctionInfo &Info,
444	SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
445	SmallVectorImpl<SDValue> &MemOpChains,
446	SDValue Chain) const;
447
448	SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
449	CallingConv::ID CallConv, bool isVarArg,
450	const SmallVectorImpl<ISD::InputArg> &Ins,
451	const SDLoc &DL, SelectionDAG &DAG,
452	SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
453	SDValue ThisVal) const;
454
455	bool mayBeEmittedAsTailCall(const CallInst ) const* override;
456
457	bool isEligibleForTailCallOptimization(
458	SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
459	const SmallVectorImpl<ISD::OutputArg> &Outs,
460	const SmallVectorImpl<SDValue> &OutVals,
461	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
462
463	SDValue LowerCall(CallLoweringInfo &CLI,
464	SmallVectorImpl<SDValue> &InVals) const override;
465
466	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
467	SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
468	SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
469	SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
470
471	SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
472	SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
473	SDValue lowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const;
474	SDValue lowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const;
475	SDValue lowerROTR(SDValue Op, SelectionDAG &DAG) const;
476
477	Register getRegisterByName(const char* RegName, LLT VT,
478	const MachineFunction &MF) const override;
479
480	MachineBasicBlock *splitKillBlock(MachineInstr &MI,
481	MachineBasicBlock BB) const*;
482
483	void bundleInstWithWaitcnt(MachineInstr &MI) const;
484	MachineBasicBlock *emitGWSMemViolTestLoop(MachineInstr &MI,
485	MachineBasicBlock BB) const*;
486
487	MachineBasicBlock *
488	EmitInstrWithCustomInserter(MachineInstr &MI,
489	MachineBasicBlock BB) const* override;
490
491	bool enableAggressiveFMAFusion(EVT VT) const override;
492	bool enableAggressiveFMAFusion(LLT Ty) const override;
493	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
494	EVT VT) const override;
495	MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
496	LLT getPreferredShiftAmountTy(LLT Ty) const override;
497
498	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
499	EVT VT) const override;
500	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
501	const LLT Ty) const override;
502	bool isFMADLegal(const SelectionDAG &DAG, const SDNode N) const* override;
503	bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
504
505	SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
506	SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
507	SDValue splitTernaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
508	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
509	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
510	SelectionDAG &DAG) const override;
511
512	SDValue PerformDAGCombine(SDNode N, DAGCombinerInfo &DCI) const* override;
513	SDNode PostISelFolding(MachineSDNode N, SelectionDAG &DAG) const override;
514	void AddMemOpInit(MachineInstr &MI) const;
515	void AdjustInstrPostInstrSelection(MachineInstr &MI,
516	SDNode Node) const* override;
517
518	SDNode legalizeTargetIndependentNode(SDNode Node, SelectionDAG &DAG) const;
519
520	MachineSDNode wrapAddr64Rsrc(SelectionDAG &DAG, const* SDLoc &DL,
521	SDValue Ptr) const;
522	MachineSDNode buildRSRC(SelectionDAG &DAG, const* SDLoc &DL, SDValue Ptr,
523	uint32_t RsrcDword1, uint64_t RsrcDword2And3) const;
524	std::pair<unsigned, const TargetRegisterClass *>
525	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
526	StringRef Constraint, MVT VT) const override;
527	ConstraintType getConstraintType(StringRef Constraint) const override;
528	void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
529	std::vector<SDValue> &Ops,
530	SelectionDAG &DAG) const override;
531	bool getAsmOperandConstVal(SDValue Op, uint64_t &Val) const;
532	bool checkAsmConstraintVal(SDValue Op, StringRef Constraint,
533	uint64_t Val) const;
534	bool checkAsmConstraintValA(SDValue Op,
535	uint64_t Val,
536	unsigned MaxSize = `64`) const;
537	SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL,
538	SDValue V) const;
539
540	void finalizeLowering(MachineFunction &MF) const override;
541
542	void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
543	const APInt &DemandedElts,
544	const SelectionDAG &DAG,
545	unsigned Depth = `0`) const override;
546	void computeKnownBitsForStackObjectPointer(KnownBits &Known,
547	const MachineFunction &MF,
548	Align Alignment) const override;
549	void computeKnownBitsForTargetInstr(GISelValueTracking &Analysis, Register R,
550	KnownBits &Known,
551	const APInt &DemandedElts,
552	const MachineRegisterInfo &MRI,
553	unsigned Depth = `0`) const override;
554
555	Align computeKnownAlignForTargetInstr(GISelValueTracking &Analysis,
556	Register R,
557	const MachineRegisterInfo &MRI,
558	unsigned Depth = `0`) const override;
559	bool isSDNodeSourceOfDivergence(const SDNode N, FunctionLoweringInfo FLI,
560	UniformityInfo UA) const* override;
561
562	bool hasMemSDNodeUser(SDNode N) const*;
563
564	bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
565	SDValue N1) const override;
566
567	bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
568	Register N1) const override;
569
570	bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
571	SDNodeFlags UserFlags = {}, unsigned MaxDepth = `5`) const;
572	bool isCanonicalized(Register Reg, const MachineFunction &MF,
573	unsigned MaxDepth = `5`) const;
574	bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
575	bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
576
577	bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
578	const SelectionDAG &DAG, bool SNaN = false,
579	unsigned Depth = `0`) const override;
580	AtomicExpansionKind
581	shouldExpandAtomicRMWInIR(const AtomicRMWInst ) const* override;
582	AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst LI) const* override;
583	AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst SI) const* override;
584	AtomicExpansionKind
585	shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst AI) const* override;
586
587	void emitExpandAtomicAddrSpacePredicate(Instruction AI) const*;
588	void emitExpandAtomicRMW(AtomicRMWInst AI) const* override;
589	void emitExpandAtomicCmpXchg(AtomicCmpXchgInst CI) const* override;
590	void emitExpandAtomicLoad(LoadInst LI) const* override;
591	void emitExpandAtomicStore(StoreInst SI) const* override;
592
593	LoadInst *
594	lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst AI) const* override;
595
596	const TargetRegisterClass *getRegClassFor(MVT VT,
597	bool isDivergent) const override;
598	bool requiresUniformRegister(MachineFunction &MF,
599	const Value V) const* override;
600	Align getPrefLoopAlignment(MachineLoop ML) const* override;
601	unsigned
602	getMaxPermittedBytesForAlignment(MachineBasicBlock MBB) const* override;
603
604	void allocateHSAUserSGPRs(CCState &CCInfo,
605	MachineFunction &MF,
606	const SIRegisterInfo &TRI,
607	SIMachineFunctionInfo &Info) const;
608
609	void allocatePreloadKernArgSGPRs(CCState &CCInfo,
610	SmallVectorImpl<CCValAssign> &ArgLocs,
611	const SmallVectorImpl<ISD::InputArg> &Ins,
612	MachineFunction &MF,
613	const SIRegisterInfo &TRI,
614	SIMachineFunctionInfo &Info) const;
615
616	void allocateLDSKernelId(CCState &CCInfo, MachineFunction &MF,
617	const SIRegisterInfo &TRI,
618	SIMachineFunctionInfo &Info) const;
619
620	void allocateSystemSGPRs(CCState &CCInfo,
621	MachineFunction &MF,
622	SIMachineFunctionInfo &Info,
623	CallingConv::ID CallConv,
624	bool IsShader) const;
625
626	void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
627	MachineFunction &MF,
628	const SIRegisterInfo &TRI,
629	SIMachineFunctionInfo &Info) const;
630	void allocateSpecialInputSGPRs(
631	CCState &CCInfo,
632	MachineFunction &MF,
633	const SIRegisterInfo &TRI,
634	SIMachineFunctionInfo &Info) const;
635
636	void allocateSpecialInputVGPRs(CCState &CCInfo,
637	MachineFunction &MF,
638	const SIRegisterInfo &TRI,
639	SIMachineFunctionInfo &Info) const;
640	void allocateSpecialInputVGPRsFixed(CCState &CCInfo,
641	MachineFunction &MF,
642	const SIRegisterInfo &TRI,
643	SIMachineFunctionInfo &Info) const;
644
645	MachineMemOperand::Flags
646	getTargetMMOFlags(const Instruction &I) const override;
647	};
648
649	// Returns true if argument is a boolean value which is not serialized into
650	// memory or argument and does not require v_cndmask_b32 to be deserialized.
651	bool isBoolSGPR(SDValue V);
652
653	} // End namespace llvm
654
655	#endif
656

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/SIISelLowering.h