AArch64TargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h]

1	//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file a TargetTransformInfoImplBase conforming object specific to the
10	/// AArch64 target machine. It uses the target's detailed information to
11	/// provide more precise answers to certain TTI queries, while letting the
12	/// target independent and default TTI implementations handle the rest.
13	///
14	//===----------------------------------------------------------------------===//
15
16	#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17	#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18
19	#include "AArch64.h"
20	#include "AArch64Subtarget.h"
21	#include "AArch64TargetMachine.h"
22	#include "llvm/Analysis/TargetTransformInfo.h"
23	#include "llvm/CodeGen/BasicTTIImpl.h"
24	#include "llvm/IR/Function.h"
25	#include "llvm/IR/Intrinsics.h"
26	#include "llvm/Support/InstructionCost.h"
27	#include <cstdint>
28	#include <optional>
29
30	namespace llvm {
31
32	class APInt;
33	class Instruction;
34	class IntrinsicInst;
35	class Loop;
36	class SCEV;
37	class ScalarEvolution;
38	class Type;
39	class Value;
40	class VectorType;
41
42	class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
43	using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
44	using TTI = TargetTransformInfo;
45
46	friend BaseT;
47
48	const AArch64Subtarget *ST;
49	const AArch64TargetLowering *TLI;
50
51	static const FeatureBitset InlineInverseFeatures;
52
53	const AArch64Subtarget getST() const* { return ST; }
54	const AArch64TargetLowering getTLI() const* { return TLI; }
55
56	enum MemIntrinsicType {
57	VECTOR_LDST_TWO_ELEMENTS,
58	VECTOR_LDST_THREE_ELEMENTS,
59	VECTOR_LDST_FOUR_ELEMENTS
60	};
61
62	bool isWideningInstruction(Type DstTy, unsigned* Opcode,
63	ArrayRef<const Value *> Args,
64	Type SrcOverrideTy = nullptr) const*;
65
66	// A helper function called by 'getVectorInstrCost'.
67	//
68	// 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
69	// indicates whether the vector instruction is available in the input IR or
70	// just imaginary in vectorizer passes.
71	/// \param ScalarUserAndIdx encodes the information about extracts from a
72	/// vector with 'Scalar' being the value being extracted,'User' being the user
73	/// of the extract(nullptr if user is not known before vectorization) and
74	/// 'Idx' being the extract lane.
75	InstructionCost getVectorInstrCostHelper(
76	unsigned Opcode, Type Val, TTI::TargetCostKind CostKind, unsigned* Index,
77	bool HasRealUse, const Instruction I = nullptr, Value Scalar = nullptr,
78	ArrayRef<std::tuple<Value , User , int>> ScalarUserAndIdx = {}) const;
79
80	public:
81	explicit AArch64TTIImpl(const AArch64TargetMachine TM, const* Function &F)
82	: BaseT (TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
83	TLI(ST->getTargetLowering()) {}
84
85	bool areInlineCompatible(const Function *Caller,
86	const Function Callee) const* override;
87
88	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
89	const ArrayRef<Type > &Types) const* override;
90
91	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
92	unsigned DefaultCallPenalty) const override;
93
94	uint64_t getFeatureMask(const Function &F) const override;
95
96	bool isMultiversionedFunction(const Function &F) const override;
97
98	/// \name Scalar TTI Implementations
99	/// @{
100
101	using BaseT::getIntImmCost;
102	InstructionCost getIntImmCost(int64_t Val) const;
103	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
104	TTI::TargetCostKind CostKind) const override;
105	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
106	const APInt &Imm, Type *Ty,
107	TTI::TargetCostKind CostKind,
108	Instruction Inst = nullptr) const* override;
109	InstructionCost
110	getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
111	Type Ty, TTI::TargetCostKind CostKind) const* override;
112	TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
113
114	/// @}
115
116	/// \name Vector TTI Implementations
117	/// @{
118
119	bool enableInterleavedAccessVectorization() const override { return true; }
120
121	bool enableMaskedInterleavedAccessVectorization() const override {
122	return ST->hasSVE();
123	}
124
125	unsigned getNumberOfRegisters(unsigned ClassID) const override {
126	bool Vector = (ClassID == `1`);
127	if (Vector) {
128	if (ST->hasNEON())
129	return `32`;
130	return `0`;
131	}
132	return `31`;
133	}
134
135	InstructionCost
136	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
137	TTI::TargetCostKind CostKind) const override;
138
139	std::optional<Instruction *>
140	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
141
142	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
143	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
144	APInt &UndefElts2, APInt &UndefElts3,
145	std::function<void(Instruction , unsigned*, APInt, APInt &)>
146	SimplifyAndSetOp) const override;
147
148	TypeSize
149	getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override;
150
151	unsigned getMinVectorRegisterBitWidth() const override {
152	return ST->getMinVectorRegisterBitWidth();
153	}
154
155	std::optional<unsigned> getVScaleForTuning() const override {
156	return ST->getVScaleForTuning();
157	}
158
159	bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
160
161	bool shouldMaximizeVectorBandwidth(
162	TargetTransformInfo::RegisterKind K) const override;
163
164	/// Try to return an estimate cost factor that can be used as a multiplier
165	/// when scalarizing an operation for a vector with ElementCount \p VF.
166	/// For scalable vectors this currently takes the most pessimistic view based
167	/// upon the maximum possible value for vscale.
168	unsigned getMaxNumElements(ElementCount VF) const {
169	if (!VF.isScalable())
170	return VF.getFixedValue();
171
172	return VF.getKnownMinValue() * ST->getVScaleForTuning();
173	}
174
175	unsigned getMaxInterleaveFactor(ElementCount VF) const override;
176
177	bool prefersVectorizedAddressing() const override;
178
179	InstructionCost
180	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
181	unsigned AddressSpace,
182	TTI::TargetCostKind CostKind) const override;
183
184	InstructionCost
185	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
186	bool VariableMask, Align Alignment,
187	TTI::TargetCostKind CostKind,
188	const Instruction I = nullptr) const* override;
189
190	bool isExtPartOfAvgExpr(const Instruction ExtUser, Type Dst,
191	Type Src) const*;
192
193	InstructionCost
194	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
195	TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
196	const Instruction I = nullptr) const* override;
197
198	InstructionCost
199	getExtractWithExtendCost(unsigned Opcode, Type Dst, VectorType VecTy,
200	unsigned Index,
201	TTI::TargetCostKind CostKind) const override;
202
203	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
204	const Instruction I = nullptr) const* override;
205
206	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
207	TTI::TargetCostKind CostKind,
208	unsigned Index, const Value *Op0,
209	const Value Op1) const* override;
210
211	/// \param ScalarUserAndIdx encodes the information about extracts from a
212	/// vector with 'Scalar' being the value being extracted,'User' being the user
213	/// of the extract(nullptr if user is not known before vectorization) and
214	/// 'Idx' being the extract lane.
215	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
216	TTI::TargetCostKind CostKind,
217	unsigned Index, Value *Scalar,
218	ArrayRef<std::tuple<Value , User , int>>
219	ScalarUserAndIdx) const override;
220
221	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
222	TTI::TargetCostKind CostKind,
223	unsigned Index) const override;
224
225	InstructionCost
226	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
227	TTI::TargetCostKind CostKind) const override;
228
229	InstructionCost
230	getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
231	TTI::TargetCostKind CostKind) const;
232
233	InstructionCost getSpliceCost(VectorType Tp, int* Index,
234	TTI::TargetCostKind CostKind) const;
235
236	InstructionCost getArithmeticInstrCost(
237	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
238	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
239	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
240	ArrayRef<const Value *> Args = {},
241	const Instruction CxtI = nullptr) const* override;
242
243	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
244	const SCEV Ptr) const* override;
245
246	InstructionCost getCmpSelInstrCost(
247	unsigned Opcode, Type ValTy, Type CondTy, CmpInst::Predicate VecPred,
248	TTI::TargetCostKind CostKind,
249	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
250	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
251	const Instruction I = nullptr) const* override;
252
253	TTI::MemCmpExpansionOptions
254	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
255	bool useNeonVector(const Type Ty) const*;
256
257	InstructionCost getMemoryOpCost(
258	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
259	TTI::TargetCostKind CostKind,
260	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
261	const Instruction I = nullptr) const* override;
262
263	InstructionCost
264	getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const* override;
265
266	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
267	TTI::UnrollingPreferences &UP,
268	OptimizationRemarkEmitter ORE) const* override;
269
270	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
271	TTI::PeelingPreferences &PP) const override;
272
273	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
274	Type ExpectedType) const* override;
275
276	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
277	MemIntrinsicInfo &Info) const override;
278
279	bool isElementTypeLegalForScalableVector(Type Ty) const* override {
280	if (Ty->isPointerTy())
281	return true;
282
283	if (Ty->isBFloatTy() && ST->hasBF16())
284	return true;
285
286	if (Ty->isHalfTy() \|\| Ty->isFloatTy() \|\| Ty->isDoubleTy())
287	return true;
288
289	if (Ty->isIntegerTy(Bitwidth: `1`) \|\| Ty->isIntegerTy(Bitwidth: `8`) \|\| Ty->isIntegerTy(Bitwidth: `16`) \|\|
290	Ty->isIntegerTy(Bitwidth: `32`) \|\| Ty->isIntegerTy(Bitwidth: `64`))
291	return true;
292
293	return false;
294	}
295
296	bool isLegalMaskedLoadStore(Type DataType, Align Alignment) const* {
297	if (!ST->hasSVE())
298	return false;
299
300	// For fixed vectors, avoid scalarization if using SVE for them.
301	if (isa<FixedVectorType>(Val: DataType) && !ST->useSVEForFixedLengthVectors() &&
302	DataType->getPrimitiveSizeInBits() != `128`)
303	return false; // Fall back to scalarization of masked operations.
304
305	return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType());
306	}
307
308	bool isLegalMaskedLoad(Type *DataType, Align Alignment,
309	unsigned /AddressSpace/) const override {
310	return isLegalMaskedLoadStore(DataType, Alignment);
311	}
312
313	bool isLegalMaskedStore(Type *DataType, Align Alignment,
314	unsigned /AddressSpace/) const override {
315	return isLegalMaskedLoadStore(DataType, Alignment);
316	}
317
318	bool isLegalMaskedGatherScatter(Type DataType) const* {
319	if (!ST->isSVEAvailable())
320	return false;
321
322	// For fixed vectors, scalarize if not using SVE for them.
323	auto *DataTypeFVTy = dyn_cast<FixedVectorType>(Val: DataType);
324	if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() \|\|
325	DataTypeFVTy->getNumElements() < `2`))
326	return false;
327
328	return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType());
329	}
330
331	bool isLegalMaskedGather(Type DataType, Align Alignment) const* override {
332	return isLegalMaskedGatherScatter(DataType);
333	}
334
335	bool isLegalMaskedScatter(Type DataType, Align Alignment) const* override {
336	return isLegalMaskedGatherScatter(DataType);
337	}
338
339	bool isLegalBroadcastLoad(Type *ElementTy,
340	ElementCount NumElements) const override {
341	// Return true if we can generate a `ld1r` splat load instruction.
342	if (!ST->hasNEON() \|\| NumElements.isScalable())
343	return false;
344	switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
345	case `8`:
346	case `16`:
347	case `32`:
348	case `64`: {
349	// We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
350	unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
351	return VectorBits >= `64`;
352	}
353	}
354	return false;
355	}
356
357	bool isLegalNTStoreLoad(Type DataType, Align Alignment) const* {
358	// NOTE: The logic below is mostly geared towards LV, which calls it with
359	// vectors with 2 elements. We might want to improve that, if other
360	// users show up.
361	// Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
362	// the vector can be halved so that each half fits into a register. That's
363	// the case if the element type fits into a register and the number of
364	// elements is a power of 2 > 1.
365	if (auto *DataTypeTy = dyn_cast<FixedVectorType>(Val: DataType)) {
366	unsigned NumElements = DataTypeTy->getNumElements();
367	unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
368	return NumElements > `1` && isPowerOf2_64(Value: NumElements) && EltSize >= `8` &&
369	EltSize <= `128` && isPowerOf2_64(Value: EltSize);
370	}
371	return BaseT::isLegalNTStore(DataType, Alignment);
372	}
373
374	bool isLegalNTStore(Type DataType, Align Alignment) const* override {
375	return isLegalNTStoreLoad(DataType, Alignment);
376	}
377
378	bool isLegalNTLoad(Type DataType, Align Alignment) const* override {
379	// Only supports little-endian targets.
380	if (ST->isLittleEndian())
381	return isLegalNTStoreLoad(DataType, Alignment);
382	return BaseT::isLegalNTLoad(DataType, Alignment);
383	}
384
385	InstructionCost getPartialReductionCost(
386	unsigned Opcode, Type InputTypeA, Type InputTypeB, Type *AccumType,
387	ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
388	TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
389	TTI::TargetCostKind CostKind) const override;
390
391	bool enableOrderedReductions() const override { return true; }
392
393	InstructionCost getInterleavedMemoryOpCost(
394	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
395	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
396	bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
397
398	bool shouldConsiderAddressTypePromotion(
399	const Instruction &I,
400	bool &AllowPromotionWithoutCommonHeader) const override;
401
402	bool shouldExpandReduction(const IntrinsicInst II) const* override {
403	return false;
404	}
405
406	unsigned getGISelRematGlobalCost() const override { return `2`; }
407
408	unsigned getMinTripCountTailFoldingThreshold() const override {
409	return ST->hasSVE() ? `5` : `0`;
410	}
411
412	TailFoldingStyle
413	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
414	if (ST->hasSVE())
415	return IVUpdateMayOverflow
416	? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
417	: TailFoldingStyle::DataAndControlFlow;
418
419	return TailFoldingStyle::DataWithoutLaneMask;
420	}
421
422	bool preferFixedOverScalableIfEqualCost() const override;
423
424	unsigned getEpilogueVectorizationMinVF() const override;
425
426	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const* override;
427
428	bool supportsScalableVectors() const override {
429	return ST->isSVEorStreamingSVEAvailable();
430	}
431
432	bool enableScalableVectorization() const override;
433
434	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
435	ElementCount VF) const override;
436
437	bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); }
438
439	InstructionCost
440	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
441	std::optional<FastMathFlags> FMF,
442	TTI::TargetCostKind CostKind) const override;
443
444	InstructionCost
445	getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
446	VectorType *ValTy, std::optional<FastMathFlags> FMF,
447	TTI::TargetCostKind CostKind) const override;
448
449	InstructionCost getMulAccReductionCost(
450	bool IsUnsigned, Type ResTy, VectorType Ty,
451	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
452
453	InstructionCost
454	getShuffleCost(TTI::ShuffleKind Kind, VectorType DstTy, VectorType SrcTy,
455	ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
456	VectorType SubTp, ArrayRef<const* Value *> Args = {},
457	const Instruction CxtI = nullptr) const* override;
458
459	InstructionCost getScalarizationOverhead(
460	VectorType Ty, const* APInt &DemandedElts, bool Insert, bool Extract,
461	TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
462	ArrayRef<Value > VL = {}) const* override;
463
464	/// Return the cost of the scaling factor used in the addressing
465	/// mode represented by AM for this target, for a load/store
466	/// of the specified type.
467	/// If the AM is supported, the return value must be >= 0.
468	/// If the AM is not supported, it returns an invalid cost.
469	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
470	StackOffset BaseOffset, bool HasBaseReg,
471	int64_t Scale,
472	unsigned AddrSpace) const override;
473
474	bool enableSelectOptimize() const override {
475	return ST->enableSelectOptimize();
476	}
477
478	bool shouldTreatInstructionLikeSelect(const Instruction I) const* override;
479
480	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
481	Type ScalarValTy) const* override {
482	// We can vectorize store v4i8.
483	if (ScalarMemTy->isIntegerTy(Bitwidth: `8`) && isPowerOf2_32(Value: VF) && VF >= `4`)
484	return `4`;
485
486	return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
487	}
488
489	std::optional<unsigned> getMinPageSize() const override { return `4096`; }
490
491	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
492	const TargetTransformInfo::LSRCost &C2) const override;
493
494	bool isProfitableToSinkOperands(Instruction *I,
495	SmallVectorImpl<Use > &Ops) const* override;
496	/// @}
497	};
498
499	} // end namespace llvm
500
501	#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
502

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h