1 | //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This file a TargetTransformInfo::Concept conforming object specific to the |
11 | /// ARM target machine. It uses the target's detailed information to |
12 | /// provide more precise answers to certain TTI queries, while letting the |
13 | /// target independent and default TTI implementations handle the rest. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
18 | #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
19 | |
20 | #include "ARM.h" |
21 | #include "ARMSubtarget.h" |
22 | #include "ARMTargetMachine.h" |
23 | #include "llvm/ADT/ArrayRef.h" |
24 | #include "llvm/Analysis/TargetTransformInfo.h" |
25 | #include "llvm/CodeGen/BasicTTIImpl.h" |
26 | #include "llvm/IR/Constant.h" |
27 | #include "llvm/IR/Function.h" |
28 | #include "llvm/TargetParser/SubtargetFeature.h" |
29 | #include <optional> |
30 | |
31 | namespace llvm { |
32 | |
33 | class APInt; |
34 | class ARMTargetLowering; |
35 | class Instruction; |
36 | class Loop; |
37 | class SCEV; |
38 | class ScalarEvolution; |
39 | class Type; |
40 | class Value; |
41 | |
42 | namespace TailPredication { |
43 | enum Mode { |
44 | Disabled = 0, |
45 | EnabledNoReductions, |
46 | Enabled, |
47 | ForceEnabledNoReductions, |
48 | ForceEnabled |
49 | }; |
50 | } |
51 | |
52 | // For controlling conversion of memcpy into Tail Predicated loop. |
53 | namespace TPLoop { |
54 | enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow }; |
55 | } |
56 | |
57 | class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { |
58 | using BaseT = BasicTTIImplBase<ARMTTIImpl>; |
59 | using TTI = TargetTransformInfo; |
60 | |
61 | friend BaseT; |
62 | |
63 | const ARMSubtarget *ST; |
64 | const ARMTargetLowering *TLI; |
65 | |
66 | // Currently the following features are excluded from InlineFeaturesAllowed. |
67 | // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 |
68 | // Depending on whether they are set or unset, different |
69 | // instructions/registers are available. For example, inlining a callee with |
70 | // -thumb-mode in a caller with +thumb-mode, may cause the assembler to |
71 | // fail if the callee uses ARM only instructions, e.g. in inline asm. |
72 | const FeatureBitset InlineFeaturesAllowed = { |
73 | ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, |
74 | ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, |
75 | ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, |
76 | ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, |
77 | ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, |
78 | ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, |
79 | ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, |
80 | ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, |
81 | ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, |
82 | ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, |
83 | ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, |
84 | ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, |
85 | ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, |
86 | ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, |
87 | ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, |
88 | ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, |
89 | ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, |
90 | ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, |
91 | ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, |
92 | ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, |
93 | ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, |
94 | ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, |
95 | ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, |
96 | ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates |
97 | }; |
98 | |
99 | const ARMSubtarget *getST() const { return ST; } |
100 | const ARMTargetLowering *getTLI() const { return TLI; } |
101 | |
102 | public: |
103 | explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) |
104 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
105 | TLI(ST->getTargetLowering()) {} |
106 | |
107 | bool areInlineCompatible(const Function *Caller, |
108 | const Function *Callee) const; |
109 | |
110 | bool enableInterleavedAccessVectorization() { return true; } |
111 | |
112 | TTI::AddressingModeKind |
113 | getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; |
114 | |
115 | /// Floating-point computation using ARMv8 AArch32 Advanced |
116 | /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD |
117 | /// and Arm MVE are IEEE-754 compliant. |
118 | bool isFPVectorizationPotentiallyUnsafe() { |
119 | return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); |
120 | } |
121 | |
122 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
123 | IntrinsicInst &II) const; |
124 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
125 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
126 | APInt &UndefElts2, APInt &UndefElts3, |
127 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
128 | SimplifyAndSetOp) const; |
129 | |
130 | /// \name Scalar TTI Implementations |
131 | /// @{ |
132 | |
133 | InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, |
134 | const APInt &Imm, Type *Ty); |
135 | |
136 | using BaseT::getIntImmCost; |
137 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
138 | TTI::TargetCostKind CostKind); |
139 | |
140 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
141 | const APInt &Imm, Type *Ty, |
142 | TTI::TargetCostKind CostKind, |
143 | Instruction *Inst = nullptr); |
144 | |
145 | /// @} |
146 | |
147 | /// \name Vector TTI Implementations |
148 | /// @{ |
149 | |
150 | unsigned getNumberOfRegisters(unsigned ClassID) const { |
151 | bool Vector = (ClassID == 1); |
152 | if (Vector) { |
153 | if (ST->hasNEON()) |
154 | return 16; |
155 | if (ST->hasMVEIntegerOps()) |
156 | return 8; |
157 | return 0; |
158 | } |
159 | |
160 | if (ST->isThumb1Only()) |
161 | return 8; |
162 | return 13; |
163 | } |
164 | |
165 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
166 | switch (K) { |
167 | case TargetTransformInfo::RGK_Scalar: |
168 | return TypeSize::getFixed(ExactSize: 32); |
169 | case TargetTransformInfo::RGK_FixedWidthVector: |
170 | if (ST->hasNEON()) |
171 | return TypeSize::getFixed(ExactSize: 128); |
172 | if (ST->hasMVEIntegerOps()) |
173 | return TypeSize::getFixed(ExactSize: 128); |
174 | return TypeSize::getFixed(ExactSize: 0); |
175 | case TargetTransformInfo::RGK_ScalableVector: |
176 | return TypeSize::getScalable(MinimumSize: 0); |
177 | } |
178 | llvm_unreachable("Unsupported register kind" ); |
179 | } |
180 | |
181 | unsigned getMaxInterleaveFactor(ElementCount VF) { |
182 | return ST->getMaxInterleaveFactor(); |
183 | } |
184 | |
185 | bool isProfitableLSRChainElement(Instruction *I); |
186 | |
187 | bool isLegalMaskedLoad(Type *DataTy, Align Alignment); |
188 | |
189 | bool isLegalMaskedStore(Type *DataTy, Align Alignment) { |
190 | return isLegalMaskedLoad(DataTy, Alignment); |
191 | } |
192 | |
193 | bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { |
194 | // For MVE, we have a custom lowering pass that will already have custom |
195 | // legalised any gathers that we can lower to MVE intrinsics, and want to |
196 | // expand all the rest. The pass runs before the masked intrinsic lowering |
197 | // pass. |
198 | return true; |
199 | } |
200 | |
201 | bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { |
202 | return forceScalarizeMaskedGather(VTy, Alignment); |
203 | } |
204 | |
205 | bool isLegalMaskedGather(Type *Ty, Align Alignment); |
206 | |
207 | bool isLegalMaskedScatter(Type *Ty, Align Alignment) { |
208 | return isLegalMaskedGather(Ty, Alignment); |
209 | } |
210 | |
211 | InstructionCost getMemcpyCost(const Instruction *I); |
212 | |
213 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { |
214 | return ST->getMaxInlineSizeThreshold(); |
215 | } |
216 | |
217 | int getNumMemOps(const IntrinsicInst *I) const; |
218 | |
219 | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, |
220 | ArrayRef<int> Mask, |
221 | TTI::TargetCostKind CostKind, int Index, |
222 | VectorType *SubTp, |
223 | ArrayRef<const Value *> Args = std::nullopt, |
224 | const Instruction *CxtI = nullptr); |
225 | |
226 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, |
227 | TTI::ReductionFlags Flags) const; |
228 | |
229 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, |
230 | TTI::ReductionFlags Flags) const; |
231 | |
232 | bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } |
233 | |
234 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
235 | const Instruction *I = nullptr); |
236 | |
237 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
238 | TTI::CastContextHint CCH, |
239 | TTI::TargetCostKind CostKind, |
240 | const Instruction *I = nullptr); |
241 | |
242 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
243 | CmpInst::Predicate VecPred, |
244 | TTI::TargetCostKind CostKind, |
245 | const Instruction *I = nullptr); |
246 | |
247 | using BaseT::getVectorInstrCost; |
248 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
249 | TTI::TargetCostKind CostKind, |
250 | unsigned Index, Value *Op0, Value *Op1); |
251 | |
252 | InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, |
253 | const SCEV *Ptr); |
254 | |
255 | InstructionCost getArithmeticInstrCost( |
256 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
257 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
258 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
259 | ArrayRef<const Value *> Args = std::nullopt, |
260 | const Instruction *CxtI = nullptr); |
261 | |
262 | InstructionCost |
263 | getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
264 | unsigned AddressSpace, TTI::TargetCostKind CostKind, |
265 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
266 | const Instruction *I = nullptr); |
267 | |
268 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
269 | Align Alignment, unsigned AddressSpace, |
270 | TTI::TargetCostKind CostKind); |
271 | |
272 | InstructionCost getInterleavedMemoryOpCost( |
273 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
274 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
275 | bool UseMaskForCond = false, bool UseMaskForGaps = false); |
276 | |
277 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
278 | const Value *Ptr, bool VariableMask, |
279 | Align Alignment, |
280 | TTI::TargetCostKind CostKind, |
281 | const Instruction *I = nullptr); |
282 | |
283 | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, |
284 | std::optional<FastMathFlags> FMF, |
285 | TTI::TargetCostKind CostKind); |
286 | InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, |
287 | Type *ResTy, VectorType *ValTy, |
288 | FastMathFlags FMF, |
289 | TTI::TargetCostKind CostKind); |
290 | InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, |
291 | VectorType *ValTy, |
292 | TTI::TargetCostKind CostKind); |
293 | |
294 | InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, |
295 | FastMathFlags FMF, |
296 | TTI::TargetCostKind CostKind); |
297 | |
298 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
299 | TTI::TargetCostKind CostKind); |
300 | |
301 | /// getScalingFactorCost - Return the cost of the scaling used in |
302 | /// addressing mode represented by AM. |
303 | /// If the AM is supported, the return value must be >= 0. |
304 | /// If the AM is not supported, the return value must be negative. |
305 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
306 | StackOffset BaseOffset, bool HasBaseReg, |
307 | int64_t Scale, unsigned AddrSpace) const; |
308 | |
309 | bool maybeLoweredToCall(Instruction &I); |
310 | bool isLoweredToCall(const Function *F); |
311 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
312 | AssumptionCache &AC, |
313 | TargetLibraryInfo *LibInfo, |
314 | HardwareLoopInfo &HWLoopInfo); |
315 | bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); |
316 | void (Loop *L, ScalarEvolution &SE, |
317 | TTI::UnrollingPreferences &UP, |
318 | OptimizationRemarkEmitter *ORE); |
319 | |
320 | TailFoldingStyle |
321 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const; |
322 | |
323 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
324 | TTI::PeelingPreferences &PP); |
325 | bool shouldBuildLookupTablesForConstant(Constant *C) const { |
326 | // In the ROPI and RWPI relocation models we can't have pointers to global |
327 | // variables or functions in constant data, so don't convert switches to |
328 | // lookup tables if any of the values would need relocation. |
329 | if (ST->isROPI() || ST->isRWPI()) |
330 | return !C->needsDynamicRelocation(); |
331 | |
332 | return true; |
333 | } |
334 | |
335 | bool hasArmWideBranch(bool Thumb) const; |
336 | |
337 | /// @} |
338 | }; |
339 | |
340 | /// isVREVMask - Check if a vector shuffle corresponds to a VREV |
341 | /// instruction with the specified blocksize. (The order of the elements |
342 | /// within each block of the vector is reversed.) |
343 | inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { |
344 | assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && |
345 | "Only possible block sizes for VREV are: 16, 32, 64" ); |
346 | |
347 | unsigned EltSz = VT.getScalarSizeInBits(); |
348 | if (EltSz != 8 && EltSz != 16 && EltSz != 32) |
349 | return false; |
350 | |
351 | unsigned BlockElts = M[0] + 1; |
352 | // If the first shuffle index is UNDEF, be optimistic. |
353 | if (M[0] < 0) |
354 | BlockElts = BlockSize / EltSz; |
355 | |
356 | if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) |
357 | return false; |
358 | |
359 | for (unsigned i = 0, e = M.size(); i < e; ++i) { |
360 | if (M[i] < 0) |
361 | continue; // ignore UNDEF indices |
362 | if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) |
363 | return false; |
364 | } |
365 | |
366 | return true; |
367 | } |
368 | |
369 | } // end namespace llvm |
370 | |
371 | #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H |
372 | |