1//===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file a TargetTransformInfoImplBase conforming object specific to the
11/// ARM target machine. It uses the target's detailed information to
12/// provide more precise answers to certain TTI queries, while letting the
13/// target independent and default TTI implementations handle the rest.
14//
15//===----------------------------------------------------------------------===//
16
17#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18#define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20#include "ARM.h"
21#include "ARMSubtarget.h"
22#include "ARMTargetMachine.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/Analysis/TargetTransformInfo.h"
25#include "llvm/CodeGen/BasicTTIImpl.h"
26#include "llvm/IR/Constant.h"
27#include "llvm/IR/Function.h"
28#include "llvm/TargetParser/SubtargetFeature.h"
29#include <optional>
30
31namespace llvm {
32
33class APInt;
34class ARMTargetLowering;
35class Instruction;
36class Loop;
37class SCEV;
38class ScalarEvolution;
39class Type;
40class Value;
41
42namespace TailPredication {
43 enum Mode {
44 Disabled = 0,
45 EnabledNoReductions,
46 Enabled,
47 ForceEnabledNoReductions,
48 ForceEnabled
49 };
50}
51
52// For controlling conversion of memcpy into Tail Predicated loop.
53namespace TPLoop {
54enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
55}
56
57class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
58 using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59 using TTI = TargetTransformInfo;
60
61 friend BaseT;
62
63 const ARMSubtarget *ST;
64 const ARMTargetLowering *TLI;
65
66 // Currently the following features are excluded from InlineFeaturesAllowed.
67 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68 // Depending on whether they are set or unset, different
69 // instructions/registers are available. For example, inlining a callee with
70 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71 // fail if the callee uses ARM only instructions, e.g. in inline asm.
72 const FeatureBitset InlineFeaturesAllowed = {
73 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97 };
98
99 const ARMSubtarget *getST() const { return ST; }
100 const ARMTargetLowering *getTLI() const { return TLI; }
101
102public:
103 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
104 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
105 TLI(ST->getTargetLowering()) {}
106
107 bool areInlineCompatible(const Function *Caller,
108 const Function *Callee) const override;
109
110 bool enableInterleavedAccessVectorization() const override { return true; }
111
112 TTI::AddressingModeKind
113 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override;
114
115 /// Floating-point computation using ARMv8 AArch32 Advanced
116 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117 /// and Arm MVE are IEEE-754 compliant.
118 bool isFPVectorizationPotentiallyUnsafe() const override {
119 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120 }
121
122 std::optional<Instruction *>
123 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
124 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126 APInt &UndefElts2, APInt &UndefElts3,
127 std::function<void(Instruction *, unsigned, APInt, APInt &)>
128 SimplifyAndSetOp) const override;
129
130 /// \name Scalar TTI Implementations
131 /// @{
132
133 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134 const APInt &Imm,
135 Type *Ty) const override;
136
137 using BaseT::getIntImmCost;
138 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
139 TTI::TargetCostKind CostKind) const override;
140
141 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
142 const APInt &Imm, Type *Ty,
143 TTI::TargetCostKind CostKind,
144 Instruction *Inst = nullptr) const override;
145
146 /// @}
147
148 /// \name Vector TTI Implementations
149 /// @{
150
151 unsigned getNumberOfRegisters(unsigned ClassID) const override {
152 bool Vector = (ClassID == 1);
153 if (Vector) {
154 if (ST->hasNEON())
155 return 16;
156 if (ST->hasMVEIntegerOps())
157 return 8;
158 return 0;
159 }
160
161 if (ST->isThumb1Only())
162 return 8;
163 return 13;
164 }
165
166 TypeSize
167 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
168 switch (K) {
169 case TargetTransformInfo::RGK_Scalar:
170 return TypeSize::getFixed(ExactSize: 32);
171 case TargetTransformInfo::RGK_FixedWidthVector:
172 if (ST->hasNEON())
173 return TypeSize::getFixed(ExactSize: 128);
174 if (ST->hasMVEIntegerOps())
175 return TypeSize::getFixed(ExactSize: 128);
176 return TypeSize::getFixed(ExactSize: 0);
177 case TargetTransformInfo::RGK_ScalableVector:
178 return TypeSize::getScalable(MinimumSize: 0);
179 }
180 llvm_unreachable("Unsupported register kind");
181 }
182
183 unsigned getMaxInterleaveFactor(ElementCount VF) const override {
184 return ST->getMaxInterleaveFactor();
185 }
186
187 bool isProfitableLSRChainElement(Instruction *I) const override;
188
189 bool isLegalMaskedLoad(Type *DataTy, Align Alignment,
190 unsigned AddressSpace) const override;
191
192 bool isLegalMaskedStore(Type *DataTy, Align Alignment,
193 unsigned AddressSpace) const override {
194 return isLegalMaskedLoad(DataTy, Alignment, AddressSpace);
195 }
196
197 bool forceScalarizeMaskedGather(VectorType *VTy,
198 Align Alignment) const override {
199 // For MVE, we have a custom lowering pass that will already have custom
200 // legalised any gathers that we can lower to MVE intrinsics, and want to
201 // expand all the rest. The pass runs before the masked intrinsic lowering
202 // pass.
203 return true;
204 }
205
206 bool forceScalarizeMaskedScatter(VectorType *VTy,
207 Align Alignment) const override {
208 return forceScalarizeMaskedGather(VTy, Alignment);
209 }
210
211 bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
212
213 bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override {
214 return isLegalMaskedGather(Ty, Alignment);
215 }
216
217 InstructionCost getMemcpyCost(const Instruction *I) const override;
218
219 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
220 return ST->getMaxInlineSizeThreshold();
221 }
222
223 int getNumMemOps(const IntrinsicInst *I) const;
224
225 InstructionCost
226 getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
227 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
228 VectorType *SubTp, ArrayRef<const Value *> Args = {},
229 const Instruction *CxtI = nullptr) const override;
230
231 bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override;
232
233 bool preferPredicatedReductionSelect() const override;
234
235 bool shouldExpandReduction(const IntrinsicInst *II) const override {
236 return false;
237 }
238
239 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
240 const Instruction *I = nullptr) const override;
241
242 InstructionCost
243 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
244 TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
245 const Instruction *I = nullptr) const override;
246
247 InstructionCost getCmpSelInstrCost(
248 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
249 TTI::TargetCostKind CostKind,
250 TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
251 TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
252 const Instruction *I = nullptr) const override;
253
254 using BaseT::getVectorInstrCost;
255 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
256 TTI::TargetCostKind CostKind,
257 unsigned Index, const Value *Op0,
258 const Value *Op1) const override;
259
260 InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
261 const SCEV *Ptr) const override;
262
263 InstructionCost getArithmeticInstrCost(
264 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
265 TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
266 TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
267 ArrayRef<const Value *> Args = {},
268 const Instruction *CxtI = nullptr) const override;
269
270 InstructionCost getMemoryOpCost(
271 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
272 TTI::TargetCostKind CostKind,
273 TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
274 const Instruction *I = nullptr) const override;
275
276 InstructionCost
277 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
278 unsigned AddressSpace,
279 TTI::TargetCostKind CostKind) const override;
280
281 InstructionCost getInterleavedMemoryOpCost(
282 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
283 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
284 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
285
286 InstructionCost
287 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
288 bool VariableMask, Align Alignment,
289 TTI::TargetCostKind CostKind,
290 const Instruction *I = nullptr) const override;
291
292 InstructionCost
293 getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
294 std::optional<FastMathFlags> FMF,
295 TTI::TargetCostKind CostKind) const override;
296 InstructionCost
297 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
298 VectorType *ValTy, std::optional<FastMathFlags> FMF,
299 TTI::TargetCostKind CostKind) const override;
300 InstructionCost
301 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy,
302 TTI::TargetCostKind CostKind) const override;
303
304 InstructionCost
305 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
306 TTI::TargetCostKind CostKind) const override;
307
308 InstructionCost
309 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
310 TTI::TargetCostKind CostKind) const override;
311
312 /// getScalingFactorCost - Return the cost of the scaling used in
313 /// addressing mode represented by AM.
314 /// If the AM is supported, the return value must be >= 0.
315 /// If the AM is not supported, the return value is an invalid cost.
316 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
317 StackOffset BaseOffset, bool HasBaseReg,
318 int64_t Scale,
319 unsigned AddrSpace) const override;
320
321 bool maybeLoweredToCall(Instruction &I) const;
322 bool isLoweredToCall(const Function *F) const override;
323 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
324 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
325 HardwareLoopInfo &HWLoopInfo) const override;
326 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override;
327 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
328 TTI::UnrollingPreferences &UP,
329 OptimizationRemarkEmitter *ORE) const override;
330
331 TailFoldingStyle
332 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override;
333
334 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
335 TTI::PeelingPreferences &PP) const override;
336 bool shouldBuildLookupTablesForConstant(Constant *C) const override {
337 // In the ROPI and RWPI relocation models we can't have pointers to global
338 // variables or functions in constant data, so don't convert switches to
339 // lookup tables if any of the values would need relocation.
340 if (ST->isROPI() || ST->isRWPI())
341 return !C->needsDynamicRelocation();
342
343 return true;
344 }
345
346 bool hasArmWideBranch(bool Thumb) const override;
347
348 bool isProfitableToSinkOperands(Instruction *I,
349 SmallVectorImpl<Use *> &Ops) const override;
350
351 unsigned getNumBytesToPadGlobalArray(unsigned Size,
352 Type *ArrayType) const override;
353
354 /// @}
355};
356
357/// isVREVMask - Check if a vector shuffle corresponds to a VREV
358/// instruction with the specified blocksize. (The order of the elements
359/// within each block of the vector is reversed.)
360inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
361 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
362 "Only possible block sizes for VREV are: 16, 32, 64");
363
364 unsigned EltSz = VT.getScalarSizeInBits();
365 if (EltSz != 8 && EltSz != 16 && EltSz != 32)
366 return false;
367
368 unsigned BlockElts = M[0] + 1;
369 // If the first shuffle index is UNDEF, be optimistic.
370 if (M[0] < 0)
371 BlockElts = BlockSize / EltSz;
372
373 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
374 return false;
375
376 for (unsigned i = 0, e = M.size(); i < e; ++i) {
377 if (M[i] < 0)
378 continue; // ignore UNDEF indices
379 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
380 return false;
381 }
382
383 return true;
384}
385
386} // end namespace llvm
387
388#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
389