AMDGPUTargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h]

1	//===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfoImplBase conforming object specific to the
11	/// AMDGPU target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19
20	#include "AMDGPU.h"
21	#include "llvm/CodeGen/BasicTTIImpl.h"
22	#include "llvm/Support/AMDGPUAddrSpace.h"
23	#include <optional>
24
25	namespace llvm {
26
27	class AMDGPUTargetMachine;
28	class GCNSubtarget;
29	class InstCombiner;
30	class Loop;
31	class ScalarEvolution;
32	class SITargetLowering;
33	class Type;
34	class Value;
35
36	class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
37	using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
38	using TTI = TargetTransformInfo;
39
40	friend BaseT;
41
42	Triple TargetTriple;
43
44	const TargetSubtargetInfo *ST;
45	const TargetLoweringBase *TLI;
46
47	const TargetSubtargetInfo getST() const* { return ST; }
48	const TargetLoweringBase getTLI() const* { return TLI; }
49
50	public:
51	explicit AMDGPUTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
52
53	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
54	TTI::UnrollingPreferences &UP,
55	OptimizationRemarkEmitter ORE) const* override;
56
57	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
58	TTI::PeelingPreferences &PP) const override;
59
60	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;
61	};
62
63	class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
64	using BaseT = BasicTTIImplBase<GCNTTIImpl>;
65	using TTI = TargetTransformInfo;
66
67	friend BaseT;
68
69	const GCNSubtarget *ST;
70	const SITargetLowering *TLI;
71	AMDGPUTTIImpl CommonTTI;
72	bool IsGraphics;
73	bool HasFP32Denormals;
74	bool HasFP64FP16Denormals;
75	static constexpr bool InlinerVectorBonusPercent = `0`;
76
77	static const FeatureBitset InlineFeatureIgnoreList;
78
79	const GCNSubtarget getST() const* { return ST; }
80	const SITargetLowering getTLI() const* { return TLI; }
81
82	static inline int getFullRateInstrCost() {
83	return TargetTransformInfo::TCC_Basic;
84	}
85
86	static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
87	return CostKind == TTI::TCK_CodeSize ? `2`
88	: `2` * TargetTransformInfo::TCC_Basic;
89	}
90
91	// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
92	// should be 2 or 4.
93	static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
94	return CostKind == TTI::TCK_CodeSize ? `2`
95	: `4` * TargetTransformInfo::TCC_Basic;
96	}
97
98	// On some parts, normal fp64 operations are half rate, and others
99	// quarter. This also applies to some integer operations.
100	int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
101
102	std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type Ty) const*;
103
104	/// \returns true if V might be divergent even when all of its operands
105	/// are uniform.
106	bool isSourceOfDivergence(const Value V) const*;
107
108	/// Returns true for the target specific set of operations which produce
109	/// uniform result even taking non-uniform arguments.
110	bool isAlwaysUniform(const Value V) const*;
111
112	public:
113	explicit GCNTTIImpl(const AMDGPUTargetMachine TM, const* Function &F);
114
115	bool hasBranchDivergence(const Function F = nullptr) const* override;
116
117	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
118	TTI::UnrollingPreferences &UP,
119	OptimizationRemarkEmitter ORE) const* override;
120
121	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
122	TTI::PeelingPreferences &PP) const override;
123
124	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override {
125	assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
126	return TTI::PSK_FastHardware;
127	}
128
129	unsigned getNumberOfRegisters(unsigned RCID) const override;
130	TypeSize
131	getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const override;
132	unsigned getMinVectorRegisterBitWidth() const override;
133	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
134	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
135	unsigned ChainSizeInBytes,
136	VectorType VecTy) const* override;
137	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
138	unsigned ChainSizeInBytes,
139	VectorType VecTy) const* override;
140	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
141
142	bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
143	unsigned AddrSpace) const;
144	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
145	unsigned AddrSpace) const override;
146	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
147	unsigned AddrSpace) const override;
148
149	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override;
150	Type *getMemcpyLoopLoweringType(
151	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
152	unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
153	std::optional<uint32_t> AtomicElementSize) const override;
154
155	void getMemcpyLoopResidualLoweringType(
156	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
157	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
158	Align SrcAlign, Align DestAlign,
159	std::optional<uint32_t> AtomicCpySize) const override;
160	unsigned getMaxInterleaveFactor(ElementCount VF) const override;
161
162	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
163	MemIntrinsicInfo &Info) const override;
164
165	InstructionCost getArithmeticInstrCost(
166	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
167	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
168	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
169	ArrayRef<const Value *> Args = {},
170	const Instruction CxtI = nullptr) const* override;
171
172	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
173	const Instruction I = nullptr) const* override;
174
175	bool isInlineAsmSourceOfDivergence(const CallInst *CI,
176	ArrayRef<unsigned> Indices = {}) const;
177
178	using BaseT::getVectorInstrCost;
179	InstructionCost
180	getVectorInstrCost(unsigned Opcode, Type *ValTy, TTI::TargetCostKind CostKind,
181	unsigned Index, const Value Op0, const* Value *Op1,
182	TTI::VectorInstrContext VIC =
183	TTI::VectorInstrContext::None) const override;
184
185	bool isReadRegisterSourceOfDivergence(const IntrinsicInst ReadReg) const*;
186
187	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
188	// Address space casts must cast between different address spaces.
189	if (FromAS == ToAS)
190	return false;
191
192	// Casts between any aliasing address spaces are valid.
193	return AMDGPU::addrspacesMayAlias(AS1: FromAS, AS2: ToAS);
194	}
195
196	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
197	return AMDGPU::addrspacesMayAlias(AS1: AS0, AS2: AS1);
198	}
199
200	unsigned getFlatAddressSpace() const override {
201	// Don't bother running InferAddressSpaces pass on graphics shaders which
202	// don't use flat addressing.
203	if (IsGraphics)
204	return -`1`;
205	return AMDGPUAS::FLAT_ADDRESS;
206	}
207
208	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
209	Intrinsic::ID IID) const override;
210
211	bool
212	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
213	return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
214	AS != AMDGPUAS::PRIVATE_ADDRESS;
215	}
216
217	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
218	Value NewV) const* override;
219
220	bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
221	const Value Op1, InstCombiner &IC) const*;
222
223	bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II,
224	unsigned LaneAgIdx) const;
225
226	std::optional<Instruction *>
227	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
228
229	Value *simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC,
230	IntrinsicInst &II,
231	const APInt &DemandedElts,
232	APInt &UndefElts) const;
233
234	Instruction *hoistLaneIntrinsicThroughOperand(InstCombiner &IC,
235	IntrinsicInst &II) const;
236
237	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
238	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
239	APInt &UndefElts2, APInt &UndefElts3,
240	std::function<void(Instruction , unsigned*, APInt, APInt &)>
241	SimplifyAndSetOp) const override;
242
243	InstructionCost getVectorSplitCost() const { return `0`; }
244
245	InstructionCost
246	getShuffleCost(TTI::ShuffleKind Kind, VectorType DstTy, VectorType SrcTy,
247	ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
248	VectorType SubTp, ArrayRef<const* Value *> Args = {},
249	const Instruction CxtI = nullptr) const* override;
250
251	bool isProfitableToSinkOperands(Instruction *I,
252	SmallVectorImpl<Use > &Ops) const* override;
253
254	bool areInlineCompatible(const Function *Caller,
255	const Function Callee) const* override;
256
257	int getInliningLastCallToStaticBonus() const override;
258	unsigned getInliningThresholdMultiplier() const override { return `11`; }
259	unsigned adjustInliningThreshold(const CallBase CB) const* override;
260	unsigned getCallerAllocaCost(const CallBase *CB,
261	const AllocaInst AI) const* override;
262
263	int getInlinerVectorBonusPercent() const override {
264	return InlinerVectorBonusPercent;
265	}
266
267	InstructionCost
268	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
269	std::optional<FastMathFlags> FMF,
270	TTI::TargetCostKind CostKind) const override;
271
272	InstructionCost
273	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
274	TTI::TargetCostKind CostKind) const override;
275	InstructionCost
276	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
277	TTI::TargetCostKind CostKind) const override;
278
279	/// Data cache line size for LoopDataPrefetch pass. Has no use before GFX12.
280	unsigned getCacheLineSize() const override { return `128`; }
281
282	/// How much before a load we should place the prefetch instruction.
283	/// This is currently measured in number of IR instructions.
284	unsigned getPrefetchDistance() const override;
285
286	/// \return if target want to issue a prefetch in address space \p AS.
287	bool shouldPrefetchAddressSpace(unsigned AS) const override;
288	void collectKernelLaunchBounds(
289	const Function &F,
290	SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
291
292	enum class KnownIEEEMode { Unknown, On, Off };
293
294	/// Return KnownIEEEMode::On if we know if the use context can assume
295	/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
296	/// "amdgpu-ieee"="false".
297	KnownIEEEMode fpenvIEEEMode(const Instruction &I) const;
298
299	/// Account for loads of i8 vector types to have reduced cost. For
300	/// example the cost of load 4 i8s values is one is the cost of loading
301	/// a single i32 value.
302	InstructionCost getMemoryOpCost(
303	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
304	TTI::TargetCostKind CostKind,
305	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
306	const Instruction I = nullptr) const* override;
307
308	/// When counting parts on AMD GPUs, account for i8s being grouped
309	/// together under a single i32 value. Otherwise fall back to base
310	/// implementation.
311	unsigned getNumberOfParts(Type Tp) const* override;
312
313	InstructionUniformity getInstructionUniformity(const Value V) const* override;
314	};
315
316	} // end namespace llvm
317
318	#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
319

Browse the source code of llvm_projects/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h