1 | //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file a TargetTransformInfoImplBase conforming object specific to the |
11 | /// ARM target machine. It uses the target's detailed information to |
12 | /// provide more precise answers to certain TTI queries, while letting the |
13 | /// target independent and default TTI implementations handle the rest. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
18 | #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
19 | |
20 | #include "ARM.h" |
21 | #include "ARMSubtarget.h" |
22 | #include "ARMTargetMachine.h" |
23 | #include "llvm/ADT/ArrayRef.h" |
24 | #include "llvm/Analysis/TargetTransformInfo.h" |
25 | #include "llvm/CodeGen/BasicTTIImpl.h" |
26 | #include "llvm/IR/Constant.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/TargetParser/SubtargetFeature.h" |
29 | #include <optional> |
30 | |
31 | namespace llvm { |
32 | |
33 | class APInt; |
34 | class ARMTargetLowering; |
35 | class Instruction; |
36 | class Loop; |
37 | class SCEV; |
38 | class ScalarEvolution; |
39 | class Type; |
40 | class Value; |
41 | |
42 | namespace TailPredication { |
43 | enum Mode { |
44 | Disabled = 0, |
45 | EnabledNoReductions, |
46 | Enabled, |
47 | ForceEnabledNoReductions, |
48 | ForceEnabled |
49 | }; |
50 | } |
51 | |
52 | // For controlling conversion of memcpy into Tail Predicated loop. |
53 | namespace TPLoop { |
54 | enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow }; |
55 | } |
56 | |
57 | class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> { |
58 | using BaseT = BasicTTIImplBase<ARMTTIImpl>; |
59 | using TTI = TargetTransformInfo; |
60 | |
61 | friend BaseT; |
62 | |
63 | const ARMSubtarget *ST; |
64 | const ARMTargetLowering *TLI; |
65 | |
66 | // Currently the following features are excluded from InlineFeaturesAllowed. |
67 | // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 |
68 | // Depending on whether they are set or unset, different |
69 | // instructions/registers are available. For example, inlining a callee with |
70 | // -thumb-mode in a caller with +thumb-mode, may cause the assembler to |
71 | // fail if the callee uses ARM only instructions, e.g. in inline asm. |
72 | const FeatureBitset InlineFeaturesAllowed = { |
73 | ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, |
74 | ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, |
75 | ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, |
76 | ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, |
77 | ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, |
78 | ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, |
79 | ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, |
80 | ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, |
81 | ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, |
82 | ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, |
83 | ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, |
84 | ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, |
85 | ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, |
86 | ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, |
87 | ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, |
88 | ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, |
89 | ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, |
90 | ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, |
91 | ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, |
92 | ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, |
93 | ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, |
94 | ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, |
95 | ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, |
96 | ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates |
97 | }; |
98 | |
99 | const ARMSubtarget *getST() const { return ST; } |
100 | const ARMTargetLowering *getTLI() const { return TLI; } |
101 | |
102 | public: |
103 | explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) |
104 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
105 | TLI(ST->getTargetLowering()) {} |
106 | |
107 | bool areInlineCompatible(const Function *Caller, |
108 | const Function *Callee) const override; |
109 | |
110 | bool enableInterleavedAccessVectorization() const override { return true; } |
111 | |
112 | TTI::AddressingModeKind |
113 | getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override; |
114 | |
115 | /// Floating-point computation using ARMv8 AArch32 Advanced |
116 | /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD |
117 | /// and Arm MVE are IEEE-754 compliant. |
118 | bool isFPVectorizationPotentiallyUnsafe() const override { |
119 | return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); |
120 | } |
121 | |
122 | std::optional<Instruction *> |
123 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; |
124 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
125 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
126 | APInt &UndefElts2, APInt &UndefElts3, |
127 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
128 | SimplifyAndSetOp) const override; |
129 | |
130 | /// \name Scalar TTI Implementations |
131 | /// @{ |
132 | |
133 | InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, |
134 | const APInt &Imm, |
135 | Type *Ty) const override; |
136 | |
137 | using BaseT::getIntImmCost; |
138 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
139 | TTI::TargetCostKind CostKind) const override; |
140 | |
141 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
142 | const APInt &Imm, Type *Ty, |
143 | TTI::TargetCostKind CostKind, |
144 | Instruction *Inst = nullptr) const override; |
145 | |
146 | /// @} |
147 | |
148 | /// \name Vector TTI Implementations |
149 | /// @{ |
150 | |
151 | unsigned getNumberOfRegisters(unsigned ClassID) const override { |
152 | bool Vector = (ClassID == 1); |
153 | if (Vector) { |
154 | if (ST->hasNEON()) |
155 | return 16; |
156 | if (ST->hasMVEIntegerOps()) |
157 | return 8; |
158 | return 0; |
159 | } |
160 | |
161 | if (ST->isThumb1Only()) |
162 | return 8; |
163 | return 13; |
164 | } |
165 | |
166 | TypeSize |
167 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override { |
168 | switch (K) { |
169 | case TargetTransformInfo::RGK_Scalar: |
170 | return TypeSize::getFixed(ExactSize: 32); |
171 | case TargetTransformInfo::RGK_FixedWidthVector: |
172 | if (ST->hasNEON()) |
173 | return TypeSize::getFixed(ExactSize: 128); |
174 | if (ST->hasMVEIntegerOps()) |
175 | return TypeSize::getFixed(ExactSize: 128); |
176 | return TypeSize::getFixed(ExactSize: 0); |
177 | case TargetTransformInfo::RGK_ScalableVector: |
178 | return TypeSize::getScalable(MinimumSize: 0); |
179 | } |
180 | llvm_unreachable("Unsupported register kind" ); |
181 | } |
182 | |
183 | unsigned getMaxInterleaveFactor(ElementCount VF) const override { |
184 | return ST->getMaxInterleaveFactor(); |
185 | } |
186 | |
187 | bool isProfitableLSRChainElement(Instruction *I) const override; |
188 | |
189 | bool isLegalMaskedLoad(Type *DataTy, Align Alignment, |
190 | unsigned AddressSpace) const override; |
191 | |
192 | bool isLegalMaskedStore(Type *DataTy, Align Alignment, |
193 | unsigned AddressSpace) const override { |
194 | return isLegalMaskedLoad(DataTy, Alignment, AddressSpace); |
195 | } |
196 | |
197 | bool forceScalarizeMaskedGather(VectorType *VTy, |
198 | Align Alignment) const override { |
199 | // For MVE, we have a custom lowering pass that will already have custom |
200 | // legalised any gathers that we can lower to MVE intrinsics, and want to |
201 | // expand all the rest. The pass runs before the masked intrinsic lowering |
202 | // pass. |
203 | return true; |
204 | } |
205 | |
206 | bool forceScalarizeMaskedScatter(VectorType *VTy, |
207 | Align Alignment) const override { |
208 | return forceScalarizeMaskedGather(VTy, Alignment); |
209 | } |
210 | |
211 | bool isLegalMaskedGather(Type *Ty, Align Alignment) const override; |
212 | |
213 | bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override { |
214 | return isLegalMaskedGather(Ty, Alignment); |
215 | } |
216 | |
217 | InstructionCost getMemcpyCost(const Instruction *I) const override; |
218 | |
219 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override { |
220 | return ST->getMaxInlineSizeThreshold(); |
221 | } |
222 | |
223 | int getNumMemOps(const IntrinsicInst *I) const; |
224 | |
225 | InstructionCost |
226 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, |
227 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, |
228 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, |
229 | const Instruction *CxtI = nullptr) const override; |
230 | |
231 | bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override; |
232 | |
233 | bool preferPredicatedReductionSelect() const override; |
234 | |
235 | bool shouldExpandReduction(const IntrinsicInst *II) const override { |
236 | return false; |
237 | } |
238 | |
239 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
240 | const Instruction *I = nullptr) const override; |
241 | |
242 | InstructionCost |
243 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
244 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, |
245 | const Instruction *I = nullptr) const override; |
246 | |
247 | InstructionCost getCmpSelInstrCost( |
248 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, |
249 | TTI::TargetCostKind CostKind, |
250 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
251 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
252 | const Instruction *I = nullptr) const override; |
253 | |
254 | using BaseT::getVectorInstrCost; |
255 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
256 | TTI::TargetCostKind CostKind, |
257 | unsigned Index, const Value *Op0, |
258 | const Value *Op1) const override; |
259 | |
260 | InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, |
261 | const SCEV *Ptr) const override; |
262 | |
263 | InstructionCost getArithmeticInstrCost( |
264 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
265 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
266 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
267 | ArrayRef<const Value *> Args = {}, |
268 | const Instruction *CxtI = nullptr) const override; |
269 | |
270 | InstructionCost getMemoryOpCost( |
271 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
272 | TTI::TargetCostKind CostKind, |
273 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
274 | const Instruction *I = nullptr) const override; |
275 | |
276 | InstructionCost |
277 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
278 | unsigned AddressSpace, |
279 | TTI::TargetCostKind CostKind) const override; |
280 | |
281 | InstructionCost getInterleavedMemoryOpCost( |
282 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
283 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
284 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; |
285 | |
286 | InstructionCost |
287 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, |
288 | bool VariableMask, Align Alignment, |
289 | TTI::TargetCostKind CostKind, |
290 | const Instruction *I = nullptr) const override; |
291 | |
292 | InstructionCost |
293 | getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, |
294 | std::optional<FastMathFlags> FMF, |
295 | TTI::TargetCostKind CostKind) const override; |
296 | InstructionCost |
297 | getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, |
298 | VectorType *ValTy, std::optional<FastMathFlags> FMF, |
299 | TTI::TargetCostKind CostKind) const override; |
300 | InstructionCost |
301 | getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, |
302 | TTI::TargetCostKind CostKind) const override; |
303 | |
304 | InstructionCost |
305 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, |
306 | TTI::TargetCostKind CostKind) const override; |
307 | |
308 | InstructionCost |
309 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
310 | TTI::TargetCostKind CostKind) const override; |
311 | |
312 | /// getScalingFactorCost - Return the cost of the scaling used in |
313 | /// addressing mode represented by AM. |
314 | /// If the AM is supported, the return value must be >= 0. |
315 | /// If the AM is not supported, the return value is an invalid cost. |
316 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
317 | StackOffset BaseOffset, bool HasBaseReg, |
318 | int64_t Scale, |
319 | unsigned AddrSpace) const override; |
320 | |
321 | bool maybeLoweredToCall(Instruction &I) const; |
322 | bool isLoweredToCall(const Function *F) const override; |
323 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
324 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, |
325 | HardwareLoopInfo &HWLoopInfo) const override; |
326 | bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override; |
327 | void (Loop *L, ScalarEvolution &SE, |
328 | TTI::UnrollingPreferences &UP, |
329 | OptimizationRemarkEmitter *ORE) const override; |
330 | |
331 | TailFoldingStyle |
332 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override; |
333 | |
334 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
335 | TTI::PeelingPreferences &PP) const override; |
336 | bool shouldBuildLookupTablesForConstant(Constant *C) const override { |
337 | // In the ROPI and RWPI relocation models we can't have pointers to global |
338 | // variables or functions in constant data, so don't convert switches to |
339 | // lookup tables if any of the values would need relocation. |
340 | if (ST->isROPI() || ST->isRWPI()) |
341 | return !C->needsDynamicRelocation(); |
342 | |
343 | return true; |
344 | } |
345 | |
346 | bool hasArmWideBranch(bool Thumb) const override; |
347 | |
348 | bool isProfitableToSinkOperands(Instruction *I, |
349 | SmallVectorImpl<Use *> &Ops) const override; |
350 | |
351 | unsigned getNumBytesToPadGlobalArray(unsigned Size, |
352 | Type *ArrayType) const override; |
353 | |
354 | /// @} |
355 | }; |
356 | |
357 | /// isVREVMask - Check if a vector shuffle corresponds to a VREV |
358 | /// instruction with the specified blocksize. (The order of the elements |
359 | /// within each block of the vector is reversed.) |
360 | inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { |
361 | assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && |
362 | "Only possible block sizes for VREV are: 16, 32, 64" ); |
363 | |
364 | unsigned EltSz = VT.getScalarSizeInBits(); |
365 | if (EltSz != 8 && EltSz != 16 && EltSz != 32) |
366 | return false; |
367 | |
368 | unsigned BlockElts = M[0] + 1; |
369 | // If the first shuffle index is UNDEF, be optimistic. |
370 | if (M[0] < 0) |
371 | BlockElts = BlockSize / EltSz; |
372 | |
373 | if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) |
374 | return false; |
375 | |
376 | for (unsigned i = 0, e = M.size(); i < e; ++i) { |
377 | if (M[i] < 0) |
378 | continue; // ignore UNDEF indices |
379 | if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) |
380 | return false; |
381 | } |
382 | |
383 | return true; |
384 | } |
385 | |
386 | } // end namespace llvm |
387 | |
388 | #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
389 | |