AMDGPUTargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h]

1	//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfoImplBase conforming object specific to the
11	/// AMDGPU target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20	#include "AMDGPU.h"
21	#include "llvm/CodeGen/BasicTTIImpl.h"
22	#include "llvm/Support/AMDGPUAddrSpace.h"
23	#include <optional>
24
25	namespace llvm {
26
27	class AMDGPUTargetMachine;
28	class GCNSubtarget;
29	class InstCombiner;
30	class Loop;
31	class ScalarEvolution;
32	class SITargetLowering;
33	class Type;
34	class Value;
35
36	class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
37	using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
38	using TTI = TargetTransformInfo;
39
40	friend BaseT;
41
42	Triple TargetTriple;
43
44	const TargetSubtargetInfo *ST;
45	const TargetLoweringBase *TLI;
46
47	const TargetSubtargetInfo getST() const* { return ST; }
48	const TargetLoweringBase getTLI() const* { return TLI; }
49
50	public:
51	explicit AMDGPUTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
52
53	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
54	TTI::UnrollingPreferences &UP,
55	OptimizationRemarkEmitter ORE) const* override;
56
57	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
58	TTI::PeelingPreferences &PP) const override;
59
60	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;
61	};
62
63	class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
64	using BaseT = BasicTTIImplBase<GCNTTIImpl>;
65	using TTI = TargetTransformInfo;
66
67	friend BaseT;
68
69	const GCNSubtarget *ST;
70	const SITargetLowering *TLI;
71	AMDGPUTTIImpl CommonTTI;
72	bool IsGraphics;
73	bool HasFP32Denormals;
74	bool HasFP64FP16Denormals;
75	static constexpr bool InlinerVectorBonusPercent = `0`;
76
77	static const FeatureBitset InlineFeatureIgnoreList;
78
79	const GCNSubtarget getST() const* { return ST; }
80	const SITargetLowering getTLI() const* { return TLI; }
81
82	static inline int getFullRateInstrCost() {
83	return TargetTransformInfo::TCC_Basic;
84	}
85
86	static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
87	return CostKind == TTI::TCK_CodeSize ? `2`
88	: `2` * TargetTransformInfo::TCC_Basic;
89	}
90
91	// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
92	// should be 2 or 4.
93	static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
94	return CostKind == TTI::TCK_CodeSize ? `2`
95	: `4` * TargetTransformInfo::TCC_Basic;
96	}
97
98	// On some parts, normal fp64 operations are half rate, and others
99	// quarter. This also applies to some integer operations.
100	int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
101
102	std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type Ty) const*;
103
104	public:
105	explicit GCNTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
106
107	bool hasBranchDivergence(const Function F = nullptr) const* override;
108
109	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
110	TTI::UnrollingPreferences &UP,
111	OptimizationRemarkEmitter ORE) const* override;
112
113	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
114	TTI::PeelingPreferences &PP) const override;
115
116	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override {
117	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
118	return TTI::PSK_FastHardware;
119	}
120
121	unsigned getNumberOfRegisters(unsigned RCID) const override;
122	TypeSize
123	getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override;
124	unsigned getMinVectorRegisterBitWidth() const override;
125	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
126	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
127	unsigned ChainSizeInBytes,
128	VectorType VecTy) const* override;
129	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
130	unsigned ChainSizeInBytes,
131	VectorType VecTy) const* override;
132	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
133
134	bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
135	unsigned AddrSpace) const;
136	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
137	unsigned AddrSpace) const override;
138	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
139	unsigned AddrSpace) const override;
140
141	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;
142	Type *getMemcpyLoopLoweringType(
143	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
144	unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
145	std::optional<uint32_t> AtomicElementSize) const override;
146
147	void getMemcpyLoopResidualLoweringType(
148	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
149	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
150	Align SrcAlign, Align DestAlign,
151	std::optional<uint32_t> AtomicCpySize) const override;
152	unsigned getMaxInterleaveFactor(ElementCount VF) const override;
153
154	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
155	MemIntrinsicInfo &Info) const override;
156
157	InstructionCost getArithmeticInstrCost(
158	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
159	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
160	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
161	ArrayRef<const Value *> Args = {},
162	const Instruction CxtI = nullptr) const* override;
163
164	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
165	const Instruction I = nullptr) const* override;
166
167	bool isInlineAsmSourceOfDivergence(const CallInst *CI,
168	ArrayRef<unsigned> Indices = {}) const;
169
170	using BaseT::getVectorInstrCost;
171	InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
172	TTI::TargetCostKind CostKind,
173	unsigned Index, const Value *Op0,
174	const Value Op1) const* override;
175
176	bool isReadRegisterSourceOfDivergence(const IntrinsicInst ReadReg) const*;
177	bool isSourceOfDivergence(const Value V) const* override;
178	bool isAlwaysUniform(const Value V) const* override;
179
180	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
181	// Address space casts must cast between different address spaces.
182	if (FromAS == ToAS)
183	return false;
184
185	// Casts between any aliasing address spaces are valid.
186	return AMDGPU::addrspacesMayAlias(AS1: FromAS, AS2: ToAS);
187	}
188
189	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
190	return AMDGPU::addrspacesMayAlias(AS1: AS0, AS2: AS1);
191	}
192
193	unsigned getFlatAddressSpace() const override {
194	// Don't bother running InferAddressSpaces pass on graphics shaders which
195	// don't use flat addressing.
196	if (IsGraphics)
197	return -`1`;
198	return AMDGPUAS::FLAT_ADDRESS;
199	}
200
201	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
202	Intrinsic::ID IID) const override;
203
204	bool
205	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
206	return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
207	AS != AMDGPUAS::PRIVATE_ADDRESS;
208	}
209
210	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
211	Value NewV) const* override;
212
213	bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
214	const Value Op1, InstCombiner &IC) const*;
215
216	bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,
217	unsigned LaneAgIdx) const;
218
219	std::optional<Instruction *>
220	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
221
222	Value *simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC,
223	IntrinsicInst &II,
224	const APInt &DemandedElts,
225	APInt &UndefElts) const;
226
227	Instruction *hoistLaneIntrinsicThroughOperand(InstCombiner &IC,
228	IntrinsicInst &II) const;
229
230	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
231	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
232	APInt &UndefElts2, APInt &UndefElts3,
233	std::function<void(Instruction , unsigned*, APInt, APInt &)>
234	SimplifyAndSetOp) const override;
235
236	InstructionCost getVectorSplitCost() const { return `0`; }
237
238	InstructionCost
239	getShuffleCost(TTI::ShuffleKind Kind, VectorType DstTy, VectorType SrcTy,
240	ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
241	VectorType SubTp, ArrayRef<const* Value *> Args = {},
242	const Instruction CxtI = nullptr) const* override;
243
244	bool isProfitableToSinkOperands(Instruction *I,
245	SmallVectorImpl<Use > &Ops) const* override;
246
247	bool areInlineCompatible(const Function *Caller,
248	const Function Callee) const* override;
249
250	int getInliningLastCallToStaticBonus() const override;
251	unsigned getInliningThresholdMultiplier() const override { return `11`; }
252	unsigned adjustInliningThreshold(const CallBase CB) const* override;
253	unsigned getCallerAllocaCost(const CallBase *CB,
254	const AllocaInst AI) const* override;
255
256	int getInlinerVectorBonusPercent() const override {
257	return InlinerVectorBonusPercent;
258	}
259
260	InstructionCost
261	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
262	std::optional<FastMathFlags> FMF,
263	TTI::TargetCostKind CostKind) const override;
264
265	InstructionCost
266	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
267	TTI::TargetCostKind CostKind) const override;
268	InstructionCost
269	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
270	TTI::TargetCostKind CostKind) const override;
271
272	/// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
273	unsigned getCacheLineSize() const override { return `128`; }
274
275	/// How much before a load we should place the prefetch instruction.
276	/// This is currently measured in number of IR instructions.
277	unsigned getPrefetchDistance() const override;
278
279	/// \return if target want to issue a prefetch in address space \p AS.
280	bool shouldPrefetchAddressSpace(unsigned AS) const override;
281	void collectKernelLaunchBounds(
282	const Function &F,
283	SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
284
285	enum class KnownIEEEMode { Unknown, On, Off };
286
287	/// Return KnownIEEEMode::On if we know if the use context can assume
288	/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
289	/// "amdgpu-ieee"="false".
290	KnownIEEEMode fpenvIEEEMode(const Instruction &I) const;
291
292	/// Account for loads of i8 vector types to have reduced cost. For
293	/// example the cost of load 4 i8s values is one is the cost of loading
294	/// a single i32 value.
295	InstructionCost getMemoryOpCost(
296	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
297	TTI::TargetCostKind CostKind,
298	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
299	const Instruction I = nullptr) const* override;
300
301	/// When counting parts on AMD GPUs, account for i8s being grouped
302	/// together under a single i32 value. Otherwise fall back to base
303	/// implementation.
304	unsigned getNumberOfParts(Type Tp) const* override;
305	};
306
307	} // end namespace llvm
308
309	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
310

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h