ARMTargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/ARM/ARMTargetTransformInfo.h]

1	//===- ARMTargetTransformInfo.h - ARM specific TTI --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfoImplBase conforming object specific to the
11	/// ARM target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20	#include "ARM.h"
21	#include "ARMSubtarget.h"
22	#include "ARMTargetMachine.h"
23	#include "llvm/ADT/ArrayRef.h"
24	#include "llvm/Analysis/TargetTransformInfo.h"
25	#include "llvm/CodeGen/BasicTTIImpl.h"
26	#include "llvm/IR/Constant.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/TargetParser/SubtargetFeature.h"
29	#include <optional>
30
31	namespace llvm {
32
33	class APInt;
34	class ARMTargetLowering;
35	class Instruction;
36	class Loop;
37	class SCEV;
38	class ScalarEvolution;
39	class Type;
40	class Value;
41
42	namespace TailPredication {
43	enum Mode {
44	Disabled = `0`,
45	EnabledNoReductions,
46	Enabled,
47	ForceEnabledNoReductions,
48	ForceEnabled
49	};
50	}
51
52	// For controlling conversion of memcpy into Tail Predicated loop.
53	namespace TPLoop {
54	enum MemTransfer { ForceDisabled = `0`, ForceEnabled, Allow };
55	}
56
57	class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
58	using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59	using TTI = TargetTransformInfo;
60
61	friend BaseT;
62
63	const ARMSubtarget *ST;
64	const ARMTargetLowering *TLI;
65
66	// Currently the following features are excluded from InlineFeaturesAllowed.
67	// ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68	// Depending on whether they are set or unset, different
69	// instructions/registers are available. For example, inlining a callee with
70	// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71	// fail if the callee uses ARM only instructions, e.g. in inline asm.
72	const FeatureBitset InlineFeaturesAllowed = {
73	ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74	ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75	ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76	ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77	ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78	ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79	ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80	ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81	ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82	ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83	ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84	ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85	ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86	ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87	ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88	ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89	ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90	ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91	ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92	ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93	ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94	ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95	ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96	ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97	};
98
99	const ARMSubtarget getST() const* { return ST; }
100	const ARMTargetLowering getTLI() const* { return TLI; }
101
102	public:
103	explicit ARMTTIImpl(const ARMBaseTargetMachine TM, const* Function &F)
104	: BaseT (TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
105	TLI(ST->getTargetLowering()) {}
106
107	bool areInlineCompatible(const Function *Caller,
108	const Function Callee) const* override;
109
110	bool enableInterleavedAccessVectorization() const override { return true; }
111
112	TTI::AddressingModeKind
113	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const override;
114
115	/// Floating-point computation using ARMv8 AArch32 Advanced
116	/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117	/// and Arm MVE are IEEE-754 compliant.
118	bool isFPVectorizationPotentiallyUnsafe() const override {
119	return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120	}
121
122	std::optional<Instruction *>
123	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
124	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126	APInt &UndefElts2, APInt &UndefElts3,
127	std::function<void(Instruction , unsigned*, APInt, APInt &)>
128	SimplifyAndSetOp) const override;
129
130	/// \name Scalar TTI Implementations
131	/// @{
132
133	InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134	const APInt &Imm,
135	Type Ty) const* override;
136
137	using BaseT::getIntImmCost;
138	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
139	TTI::TargetCostKind CostKind) const override;
140
141	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
142	const APInt &Imm, Type *Ty,
143	TTI::TargetCostKind CostKind,
144	Instruction Inst = nullptr) const* override;
145
146	/// @}
147
148	/// \name Vector TTI Implementations
149	/// @{
150
151	unsigned getNumberOfRegisters(unsigned ClassID) const override {
152	bool Vector = (ClassID == `1`);
153	if (Vector) {
154	if (ST->hasNEON())
155	return `16`;
156	if (ST->hasMVEIntegerOps())
157	return `8`;
158	return `0`;
159	}
160
161	if (ST->isThumb1Only())
162	return `8`;
163	return `13`;
164	}
165
166	TypeSize
167	getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
168	switch (K) {
169	case TargetTransformInfo::RGK_Scalar:
170	return TypeSize::getFixed(ExactSize: `32`);
171	case TargetTransformInfo::RGK_FixedWidthVector:
172	if (ST->hasNEON())
173	return TypeSize::getFixed(ExactSize: `128`);
174	if (ST->hasMVEIntegerOps())
175	return TypeSize::getFixed(ExactSize: `128`);
176	return TypeSize::getFixed(ExactSize: `0`);
177	case TargetTransformInfo::RGK_ScalableVector:
178	return TypeSize::getScalable(MinimumSize: `0`);
179	}
180	llvm_unreachable("Unsupported register kind");
181	}
182
183	unsigned getMaxInterleaveFactor(ElementCount VF) const override {
184	return ST->getMaxInterleaveFactor();
185	}
186
187	bool isProfitableLSRChainElement(Instruction I) const* override;
188
189	bool isLegalMaskedLoad(Type *DataTy, Align Alignment,
190	unsigned AddressSpace) const override;
191
192	bool isLegalMaskedStore(Type *DataTy, Align Alignment,
193	unsigned AddressSpace) const override {
194	return isLegalMaskedLoad(DataTy, Alignment, AddressSpace);
195	}
196
197	bool forceScalarizeMaskedGather(VectorType *VTy,
198	Align Alignment) const override {
199	// For MVE, we have a custom lowering pass that will already have custom
200	// legalised any gathers that we can lower to MVE intrinsics, and want to
201	// expand all the rest. The pass runs before the masked intrinsic lowering
202	// pass.
203	return true;
204	}
205
206	bool forceScalarizeMaskedScatter(VectorType *VTy,
207	Align Alignment) const override {
208	return forceScalarizeMaskedGather(VTy, Alignment);
209	}
210
211	bool isLegalMaskedGather(Type Ty, Align Alignment) const* override;
212
213	bool isLegalMaskedScatter(Type Ty, Align Alignment) const* override {
214	return isLegalMaskedGather(Ty, Alignment);
215	}
216
217	InstructionCost getMemcpyCost(const Instruction I) const* override;
218
219	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
220	return ST->getMaxInlineSizeThreshold();
221	}
222
223	int getNumMemOps(const IntrinsicInst I) const*;
224
225	InstructionCost
226	getShuffleCost(TTI::ShuffleKind Kind, VectorType DstTy, VectorType SrcTy,
227	ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
228	VectorType SubTp, ArrayRef<const* Value *> Args = {},
229	const Instruction CxtI = nullptr) const* override;
230
231	bool preferInLoopReduction(RecurKind Kind, Type Ty) const* override;
232
233	bool preferPredicatedReductionSelect() const override;
234
235	bool shouldExpandReduction(const IntrinsicInst II) const* override {
236	return false;
237	}
238
239	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
240	const Instruction I = nullptr) const* override;
241
242	InstructionCost
243	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
244	TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
245	const Instruction I = nullptr) const* override;
246
247	InstructionCost getCmpSelInstrCost(
248	unsigned Opcode, Type ValTy, Type CondTy, CmpInst::Predicate VecPred,
249	TTI::TargetCostKind CostKind,
250	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
251	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
252	const Instruction I = nullptr) const* override;
253
254	using BaseT::getVectorInstrCost;
255	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
256	TTI::TargetCostKind CostKind,
257	unsigned Index, const Value *Op0,
258	const Value Op1) const* override;
259
260	InstructionCost getAddressComputationCost(Type Val, ScalarEvolution SE,
261	const SCEV Ptr) const* override;
262
263	InstructionCost getArithmeticInstrCost(
264	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
265	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
266	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
267	ArrayRef<const Value *> Args = {},
268	const Instruction CxtI = nullptr) const* override;
269
270	InstructionCost getMemoryOpCost(
271	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
272	TTI::TargetCostKind CostKind,
273	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
274	const Instruction I = nullptr) const* override;
275
276	InstructionCost
277	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
278	unsigned AddressSpace,
279	TTI::TargetCostKind CostKind) const override;
280
281	InstructionCost getInterleavedMemoryOpCost(
282	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
283	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
284	bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
285
286	InstructionCost
287	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
288	bool VariableMask, Align Alignment,
289	TTI::TargetCostKind CostKind,
290	const Instruction I = nullptr) const* override;
291
292	InstructionCost
293	getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
294	std::optional<FastMathFlags> FMF,
295	TTI::TargetCostKind CostKind) const override;
296	InstructionCost
297	getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
298	VectorType *ValTy, std::optional<FastMathFlags> FMF,
299	TTI::TargetCostKind CostKind) const override;
300	InstructionCost
301	getMulAccReductionCost(bool IsUnsigned, Type ResTy, VectorType ValTy,
302	TTI::TargetCostKind CostKind) const override;
303
304	InstructionCost
305	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
306	TTI::TargetCostKind CostKind) const override;
307
308	InstructionCost
309	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
310	TTI::TargetCostKind CostKind) const override;
311
312	/// getScalingFactorCost - Return the cost of the scaling used in
313	/// addressing mode represented by AM.
314	/// If the AM is supported, the return value must be >= 0.
315	/// If the AM is not supported, the return value is an invalid cost.
316	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
317	StackOffset BaseOffset, bool HasBaseReg,
318	int64_t Scale,
319	unsigned AddrSpace) const override;
320
321	bool maybeLoweredToCall(Instruction &I) const;
322	bool isLoweredToCall(const Function F) const* override;
323	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
324	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
325	HardwareLoopInfo &HWLoopInfo) const override;
326	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const* override;
327	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
328	TTI::UnrollingPreferences &UP,
329	OptimizationRemarkEmitter ORE) const* override;
330
331	TailFoldingStyle
332	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override;
333
334	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
335	TTI::PeelingPreferences &PP) const override;
336	bool shouldBuildLookupTablesForConstant(Constant C) const* override {
337	// In the ROPI and RWPI relocation models we can't have pointers to global
338	// variables or functions in constant data, so don't convert switches to
339	// lookup tables if any of the values would need relocation.
340	if (ST->isROPI() \|\| ST->isRWPI())
341	return !C->needsDynamicRelocation();
342
343	return true;
344	}
345
346	bool hasArmWideBranch(bool Thumb) const override;
347
348	bool isProfitableToSinkOperands(Instruction *I,
349	SmallVectorImpl<Use > &Ops) const* override;
350
351	unsigned getNumBytesToPadGlobalArray(unsigned Size,
352	Type ArrayType) const* override;
353
354	/// @}
355	};
356
357	/// isVREVMask - Check if a vector shuffle corresponds to a VREV
358	/// instruction with the specified blocksize. (The order of the elements
359	/// within each block of the vector is reversed.)
360	inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
361	assert((BlockSize == `16` \|\| BlockSize == `32` \|\| BlockSize == `64`) &&
362	"Only possible block sizes for VREV are: 16, 32, 64");
363
364	unsigned EltSz = VT.getScalarSizeInBits();
365	if (EltSz != `8` && EltSz != `16` && EltSz != `32`)
366	return false;
367
368	unsigned BlockElts = M [`0`] + `1`;
369	// If the first shuffle index is UNDEF, be optimistic.
370	if (M [`0`] < `0`)
371	BlockElts = BlockSize / EltSz;
372
373	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
374	return false;
375
376	for (unsigned i = `0`, e = M.size(); i < e; ++i) {
377	if (M [i] < `0`)
378	continue; // ignore UNDEF indices
379	if ((unsigned)M [i] != (i - i % BlockElts) + (BlockElts - `1` - i % BlockElts))
380	return false;
381	}
382
383	return true;
384	}
385
386	} // end namespace llvm
387
388	#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
389

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMTargetTransformInfo.h