ARMTargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/ARM/ARMTargetTransformInfo.h]

1	//===- ARMTargetTransformInfo.h - ARM specific TTI --------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This file a TargetTransformInfoImplBase conforming object specific to the
11	/// ARM target machine. It uses the target's detailed information to
12	/// provide more precise answers to certain TTI queries, while letting the
13	/// target independent and default TTI implementations handle the rest.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18	#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20	#include "ARM.h"
21	#include "ARMSubtarget.h"
22	#include "ARMTargetMachine.h"
23	#include "llvm/ADT/ArrayRef.h"
24	#include "llvm/Analysis/TargetTransformInfo.h"
25	#include "llvm/CodeGen/BasicTTIImpl.h"
26	#include "llvm/IR/Constant.h"
27	#include "llvm/IR/Function.h"
28	#include "llvm/TargetParser/SubtargetFeature.h"
29	#include <optional>
30
31	namespace llvm {
32
33	class APInt;
34	class ARMTargetLowering;
35	class Instruction;
36	class Loop;
37	class SCEV;
38	class ScalarEvolution;
39	class Type;
40	class Value;
41
42	namespace TailPredication {
43	enum Mode {
44	Disabled = `0`,
45	EnabledNoReductions,
46	Enabled,
47	ForceEnabledNoReductions,
48	ForceEnabled
49	};
50	}
51
52	// For controlling conversion of memcpy into Tail Predicated loop.
53	namespace TPLoop {
54	enum MemTransfer { ForceDisabled = `0`, ForceEnabled, Allow };
55	}
56
57	class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
58	using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59	using TTI = TargetTransformInfo;
60
61	friend BaseT;
62
63	const ARMSubtarget *ST;
64	const ARMTargetLowering *TLI;
65
66	// Currently the following features are excluded from InlineFeaturesAllowed.
67	// ModeThumb, FeatureNoARM, ModeSoftFloat.
68	// Depending on whether they are set or unset, different
69	// instructions/registers are available. For example, inlining a callee with
70	// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71	// fail if the callee uses ARM only instructions, e.g. in inline asm.
72	const FeatureBitset InlineFeaturesAllowed = {ARM::Feature8MSecExt,
73	ARM::FeatureAClass,
74	ARM::FeatureAES,
75	ARM::FeatureAcquireRelease,
76	ARM::FeatureAvoidMOVsShOp,
77	ARM::FeatureAvoidMULS,
78	ARM::FeatureAvoidPartialCPSR,
79	ARM::FeatureBF16,
80	ARM::FeatureCRC,
81	ARM::FeatureCheapPredicableCPSR,
82	ARM::FeatureCheckVLDnAlign,
83	ARM::FeatureCrypto,
84	ARM::FeatureD32,
85	ARM::FeatureDB,
86	ARM::FeatureDFB,
87	ARM::FeatureDSP,
88	ARM::FeatureDontWidenVMOVS,
89	ARM::FeatureDotProd,
90	ARM::FeatureExecuteOnly,
91	ARM::FeatureExpandMLx,
92	ARM::FeatureFP16,
93	ARM::FeatureFP16FML,
94	ARM::FeatureFP64,
95	ARM::FeatureFPAO,
96	ARM::FeatureFPARMv8,
97	ARM::FeatureFPARMv8_D16,
98	ARM::FeatureFPARMv8_D16_SP,
99	ARM::FeatureFPARMv8_SP,
100	ARM::FeatureFPRegs,
101	ARM::FeatureFPRegs16,
102	ARM::FeatureFPRegs64,
103	ARM::FeatureFullFP16,
104	ARM::FeatureFuseAES,
105	ARM::FeatureFuseLiterals,
106	ARM::FeatureHWDivARM,
107	ARM::FeatureHWDivThumb,
108	ARM::FeatureHasNoBranchPredictor,
109	ARM::FeatureHasRetAddrStack,
110	ARM::FeatureHasSlowFPVFMx,
111	ARM::FeatureHasSlowFPVMLx,
112	ARM::FeatureHasVMLxHazards,
113	ARM::FeatureLOB,
114	ARM::FeatureLongCalls,
115	ARM::FeatureMClass,
116	ARM::FeatureMP,
117	ARM::FeatureMVEVectorCostFactor1,
118	ARM::FeatureMVEVectorCostFactor2,
119	ARM::FeatureMVEVectorCostFactor4,
120	ARM::FeatureMatMulInt8,
121	ARM::FeatureMuxedUnits,
122	ARM::FeatureNEON,
123	ARM::FeatureNEONForFP,
124	ARM::FeatureNEONForFPMovs,
125	ARM::FeatureNoMovt,
126	ARM::FeatureNoNegativeImmediates,
127	ARM::FeatureNoPostRASched,
128	ARM::FeaturePerfMon,
129	ARM::FeaturePref32BitThumb,
130	ARM::FeaturePrefISHSTBarrier,
131	ARM::FeaturePreferBranchAlign32,
132	ARM::FeaturePreferBranchAlign64,
133	ARM::FeaturePreferVMOVSR,
134	ARM::FeatureProfUnpredicate,
135	ARM::FeatureRAS,
136	ARM::FeatureRClass,
137	ARM::FeatureReserveR9,
138	ARM::FeatureSB,
139	ARM::FeatureSHA2,
140	ARM::FeatureSlowFPBrcc,
141	ARM::FeatureSlowLoadDSubreg,
142	ARM::FeatureSlowOddRegister,
143	ARM::FeatureSlowVDUP32,
144	ARM::FeatureSlowVGETLNi32,
145	ARM::FeatureSplatVFPToNeon,
146	ARM::FeatureStrictAlign,
147	ARM::FeatureThumb2,
148	ARM::FeatureTrustZone,
149	ARM::FeatureUseMIPipeliner,
150	ARM::FeatureUseMISched,
151	ARM::FeatureUseWideStrideVFP,
152	ARM::FeatureV7Clrex,
153	ARM::FeatureVFP2,
154	ARM::FeatureVFP2_SP,
155	ARM::FeatureVFP3,
156	ARM::FeatureVFP3_D16,
157	ARM::FeatureVFP3_D16_SP,
158	ARM::FeatureVFP3_SP,
159	ARM::FeatureVFP4,
160	ARM::FeatureVFP4_D16,
161	ARM::FeatureVFP4_D16_SP,
162	ARM::FeatureVFP4_SP,
163	ARM::FeatureVMLxForwarding,
164	ARM::FeatureVirtualization,
165	ARM::FeatureZCZeroing,
166	ARM::HasMVEFloatOps,
167	ARM::HasMVEIntegerOps,
168	ARM::HasV5TEOps,
169	ARM::HasV5TOps,
170	ARM::HasV6KOps,
171	ARM::HasV6MOps,
172	ARM::HasV6Ops,
173	ARM::HasV6T2Ops,
174	ARM::HasV7Ops,
175	ARM::HasV8MBaselineOps,
176	ARM::HasV8MMainlineOps,
177	ARM::HasV8Ops,
178	ARM::HasV8_1MMainlineOps,
179	ARM::HasV8_1aOps,
180	ARM::HasV8_2aOps,
181	ARM::HasV8_3aOps,
182	ARM::HasV8_4aOps,
183	ARM::HasV8_5aOps,
184	ARM::HasV8_6aOps,
185	ARM::HasV8_7aOps,
186	ARM::HasV8_8aOps,
187	ARM::HasV8_9aOps,
188	ARM::HasV9_0aOps,
189	ARM::HasV9_1aOps,
190	ARM::HasV9_2aOps,
191	ARM::HasV9_3aOps,
192	ARM::HasV9_4aOps,
193	ARM::HasV9_5aOps,
194	ARM::HasV9_6aOps,
195	ARM::HasV9_7aOps};
196
197	const ARMSubtarget getST() const* { return ST; }
198	const ARMTargetLowering getTLI() const* { return TLI; }
199
200	public:
201	explicit ARMTTIImpl(const ARMBaseTargetMachine TM, const* Function &F)
202	: BaseT (TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
203	TLI(ST->getTargetLowering()) {}
204
205	bool areInlineCompatible(const Function *Caller,
206	const Function Callee) const* override;
207
208	bool enableInterleavedAccessVectorization() const override { return true; }
209
210	TTI::AddressingModeKind
211	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const override;
212
213	/// Floating-point computation using ARMv8 AArch32 Advanced
214	/// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
215	/// and Arm MVE are IEEE-754 compliant.
216	bool isFPVectorizationPotentiallyUnsafe() const override {
217	return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
218	}
219
220	std::optional<Instruction *>
221	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
222	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
223	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
224	APInt &UndefElts2, APInt &UndefElts3,
225	std::function<void(Instruction , unsigned*, APInt, APInt &)>
226	SimplifyAndSetOp) const override;
227
228	/// \name Scalar TTI Implementations
229	/// @{
230
231	InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
232	const APInt &Imm,
233	Type Ty) const* override;
234
235	using BaseT::getIntImmCost;
236	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
237	TTI::TargetCostKind CostKind) const override;
238
239	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
240	const APInt &Imm, Type *Ty,
241	TTI::TargetCostKind CostKind,
242	Instruction Inst = nullptr) const* override;
243
244	/// @}
245
246	/// \name Vector TTI Implementations
247	/// @{
248
249	unsigned getNumberOfRegisters(unsigned ClassID) const override {
250	bool Vector = (ClassID == `1`);
251	if (Vector) {
252	if (ST->hasNEON())
253	return `16`;
254	if (ST->hasMVEIntegerOps())
255	return `8`;
256	return `0`;
257	}
258
259	if (ST->isThumb1Only())
260	return `8`;
261	return `13`;
262	}
263
264	TypeSize
265	getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
266	switch (K) {
267	case TargetTransformInfo::RGK_Scalar:
268	return TypeSize::getFixed(ExactSize: `32`);
269	case TargetTransformInfo::RGK_FixedWidthVector:
270	if (ST->hasNEON())
271	return TypeSize::getFixed(ExactSize: `128`);
272	if (ST->hasMVEIntegerOps())
273	return TypeSize::getFixed(ExactSize: `128`);
274	return TypeSize::getFixed(ExactSize: `0`);
275	case TargetTransformInfo::RGK_ScalableVector:
276	return TypeSize::getScalable(MinimumSize: `0`);
277	}
278	llvm_unreachable("Unsupported register kind");
279	}
280
281	unsigned getMaxInterleaveFactor(ElementCount VF) const override {
282	return ST->getMaxInterleaveFactor();
283	}
284
285	bool isProfitableLSRChainElement(Instruction I) const* override;
286
287	bool
288	isLegalMaskedLoad(Type DataTy, Align Alignment, unsigned* AddressSpace,
289	TTI::MaskKind MaskKind =
290	TTI::MaskKind::VariableOrConstantMask) const override;
291
292	bool
293	isLegalMaskedStore(Type DataTy, Align Alignment, unsigned* AddressSpace,
294	TTI::MaskKind MaskKind =
295	TTI::MaskKind::VariableOrConstantMask) const override {
296	return isLegalMaskedLoad(DataTy, Alignment, AddressSpace, MaskKind);
297	}
298
299	bool forceScalarizeMaskedGather(VectorType *VTy,
300	Align Alignment) const override {
301	// For MVE, we have a custom lowering pass that will already have custom
302	// legalised any gathers that we can lower to MVE intrinsics, and want to
303	// expand all the rest. The pass runs before the masked intrinsic lowering
304	// pass.
305	return true;
306	}
307
308	bool forceScalarizeMaskedScatter(VectorType *VTy,
309	Align Alignment) const override {
310	return forceScalarizeMaskedGather(VTy, Alignment);
311	}
312
313	bool isLegalMaskedGather(Type Ty, Align Alignment) const* override;
314
315	bool isLegalMaskedScatter(Type Ty, Align Alignment) const* override {
316	return isLegalMaskedGather(Ty, Alignment);
317	}
318
319	InstructionCost getMemcpyCost(const Instruction I) const* override;
320
321	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
322	return ST->getMaxInlineSizeThreshold();
323	}
324
325	int getNumMemOps(const IntrinsicInst I) const*;
326
327	InstructionCost
328	getShuffleCost(TTI::ShuffleKind Kind, VectorType DstTy, VectorType SrcTy,
329	ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
330	VectorType SubTp, ArrayRef<const* Value *> Args = {},
331	const Instruction CxtI = nullptr) const* override;
332
333	bool preferInLoopReduction(RecurKind Kind, Type Ty) const* override;
334
335	bool preferPredicatedReductionSelect() const override;
336
337	bool shouldExpandReduction(const IntrinsicInst II) const* override {
338	return false;
339	}
340
341	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
342	const Instruction I = nullptr) const* override;
343
344	InstructionCost
345	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
346	TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
347	const Instruction I = nullptr) const* override;
348
349	InstructionCost getCmpSelInstrCost(
350	unsigned Opcode, Type ValTy, Type CondTy, CmpInst::Predicate VecPred,
351	TTI::TargetCostKind CostKind,
352	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
353	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
354	const Instruction I = nullptr) const* override;
355
356	using BaseT::getVectorInstrCost;
357	InstructionCost
358	getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind,
359	unsigned Index, const Value Op0, const* Value *Op1,
360	TTI::VectorInstrContext VIC =
361	TTI::VectorInstrContext::None) const override;
362
363	InstructionCost
364	getAddressComputationCost(Type Val, ScalarEvolution SE, const SCEV *Ptr,
365	TTI::TargetCostKind CostKind) const override;
366
367	InstructionCost getArithmeticInstrCost(
368	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
369	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
370	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
371	ArrayRef<const Value *> Args = {},
372	const Instruction CxtI = nullptr) const* override;
373
374	InstructionCost getMemoryOpCost(
375	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
376	TTI::TargetCostKind CostKind,
377	TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
378	const Instruction I = nullptr) const* override;
379
380	InstructionCost
381	getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
382	TTI::TargetCostKind CostKind) const override;
383
384	InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
385	TTI::TargetCostKind CostKind) const;
386
387	InstructionCost getInterleavedMemoryOpCost(
388	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
389	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
390	bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
391
392	InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA,
393	TTI::TargetCostKind CostKind) const;
394
395	InstructionCost
396	getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
397	std::optional<FastMathFlags> FMF,
398	TTI::TargetCostKind CostKind) const override;
399	InstructionCost
400	getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
401	VectorType *ValTy, std::optional<FastMathFlags> FMF,
402	TTI::TargetCostKind CostKind) const override;
403	InstructionCost
404	getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,
405	VectorType *ValTy,
406	TTI::TargetCostKind CostKind) const override;
407
408	InstructionCost
409	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
410	TTI::TargetCostKind CostKind) const override;
411
412	InstructionCost
413	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
414	TTI::TargetCostKind CostKind) const override;
415
416	/// getScalingFactorCost - Return the cost of the scaling used in
417	/// addressing mode represented by AM.
418	/// If the AM is supported, the return value must be >= 0.
419	/// If the AM is not supported, the return value is an invalid cost.
420	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
421	StackOffset BaseOffset, bool HasBaseReg,
422	int64_t Scale,
423	unsigned AddrSpace) const override;
424
425	bool maybeLoweredToCall(Instruction &I) const;
426	bool isLoweredToCall(const Function F) const* override;
427	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
428	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
429	HardwareLoopInfo &HWLoopInfo) const override;
430	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const* override;
431	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
432	TTI::UnrollingPreferences &UP,
433	OptimizationRemarkEmitter ORE) const* override;
434
435	TailFoldingStyle getPreferredTailFoldingStyle() const override;
436
437	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
438	TTI::PeelingPreferences &PP) const override;
439	bool shouldBuildLookupTablesForConstant(Constant C) const* override {
440	// In the ROPI and RWPI relocation models we can't have pointers to global
441	// variables or functions in constant data, so don't convert switches to
442	// lookup tables if any of the values would need relocation.
443	if (ST->isROPI() \|\| ST->isRWPI())
444	return !C->needsDynamicRelocation();
445
446	return true;
447	}
448
449	bool hasArmWideBranch(bool Thumb) const override;
450
451	bool isProfitableToSinkOperands(Instruction *I,
452	SmallVectorImpl<Use > &Ops) const* override;
453
454	unsigned getNumBytesToPadGlobalArray(unsigned Size,
455	Type ArrayType) const* override;
456
457	/// @}
458	};
459
460	/// isVREVMask - Check if a vector shuffle corresponds to a VREV
461	/// instruction with the specified blocksize. (The order of the elements
462	/// within each block of the vector is reversed.)
463	inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
464	assert((BlockSize == `16` \|\| BlockSize == `32` \|\| BlockSize == `64`) &&
465	"Only possible block sizes for VREV are: 16, 32, 64");
466
467	unsigned EltSz = VT.getScalarSizeInBits();
468	if (EltSz != `8` && EltSz != `16` && EltSz != `32`)
469	return false;
470
471	unsigned BlockElts = M [`0`] + `1`;
472	// If the first shuffle index is UNDEF, be optimistic.
473	if (M [`0`] < `0`)
474	BlockElts = BlockSize / EltSz;
475
476	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
477	return false;
478
479	for (unsigned i = `0`, e = M.size(); i < e; ++i) {
480	if (M [i] < `0`)
481	continue; // ignore UNDEF indices
482	if ((unsigned)M [i] != (i - i % BlockElts) + (BlockElts - `1` - i % BlockElts))
483	return false;
484	}
485
486	return true;
487	}
488
489	} // end namespace llvm
490
491	#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
492

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMTargetTransformInfo.h