1//===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// AArch64 target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18
19#include "AArch64.h"
20#include "AArch64Subtarget.h"
21#include "AArch64TargetMachine.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/CodeGen/BasicTTIImpl.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/Support/InstructionCost.h"
27#include <cstdint>
28#include <optional>
29
30namespace llvm {
31
32class APInt;
33class Instruction;
34class IntrinsicInst;
35class Loop;
36class SCEV;
37class ScalarEvolution;
38class Type;
39class Value;
40class VectorType;
41
42class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
44 using TTI = TargetTransformInfo;
45
46 friend BaseT;
47
48 const AArch64Subtarget *ST;
49 const AArch64TargetLowering *TLI;
50
51 static const FeatureBitset InlineInverseFeatures;
52
53 const AArch64Subtarget *getST() const { return ST; }
54 const AArch64TargetLowering *getTLI() const { return TLI; }
55
56 enum MemIntrinsicType {
57 VECTOR_LDST_TWO_ELEMENTS,
58 VECTOR_LDST_THREE_ELEMENTS,
59 VECTOR_LDST_FOUR_ELEMENTS
60 };
61
62 bool isWideningInstruction(Type *DstTy, unsigned Opcode,
63 ArrayRef<const Value *> Args,
64 Type *SrcOverrideTy = nullptr) const;
65
66 // A helper function called by 'getVectorInstrCost'.
67 //
68 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
69 // indicates whether the vector instruction is available in the input IR or
70 // just imaginary in vectorizer passes.
71 /// \param ScalarUserAndIdx encodes the information about extracts from a
72 /// vector with 'Scalar' being the value being extracted,'User' being the user
73 /// of the extract(nullptr if user is not known before vectorization) and
74 /// 'Idx' being the extract lane.
75 InstructionCost getVectorInstrCostHelper(
76 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
77 bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr,
78 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
79
80public:
81 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
82 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
83 TLI(ST->getTargetLowering()) {}
84
85 bool areInlineCompatible(const Function *Caller,
86 const Function *Callee) const override;
87
88 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
89 const ArrayRef<Type *> &Types) const override;
90
91 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
92 unsigned DefaultCallPenalty) const override;
93
94 uint64_t getFeatureMask(const Function &F) const override;
95
96 bool isMultiversionedFunction(const Function &F) const override;
97
98 /// \name Scalar TTI Implementations
99 /// @{
100
101 using BaseT::getIntImmCost;
102 InstructionCost getIntImmCost(int64_t Val) const;
103 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
104 TTI::TargetCostKind CostKind) const override;
105 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
106 const APInt &Imm, Type *Ty,
107 TTI::TargetCostKind CostKind,
108 Instruction *Inst = nullptr) const override;
109 InstructionCost
110 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
111 Type *Ty, TTI::TargetCostKind CostKind) const override;
112 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
113
114 /// @}
115
116 /// \name Vector TTI Implementations
117 /// @{
118
119 bool enableInterleavedAccessVectorization() const override { return true; }
120
121 bool enableMaskedInterleavedAccessVectorization() const override {
122 return ST->hasSVE();
123 }
124
125 unsigned getNumberOfRegisters(unsigned ClassID) const override {
126 bool Vector = (ClassID == 1);
127 if (Vector) {
128 if (ST->hasNEON())
129 return 32;
130 return 0;
131 }
132 return 31;
133 }
134
135 InstructionCost
136 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
137 TTI::TargetCostKind CostKind) const override;
138
139 std::optional<Instruction *>
140 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
141
142 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
143 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
144 APInt &UndefElts2, APInt &UndefElts3,
145 std::function<void(Instruction *, unsigned, APInt, APInt &)>
146 SimplifyAndSetOp) const override;
147
148 TypeSize
149 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override;
150
151 unsigned getMinVectorRegisterBitWidth() const override {
152 return ST->getMinVectorRegisterBitWidth();
153 }
154
155 std::optional<unsigned> getVScaleForTuning() const override {
156 return ST->getVScaleForTuning();
157 }
158
159 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
160
161 bool shouldMaximizeVectorBandwidth(
162 TargetTransformInfo::RegisterKind K) const override;
163
164 /// Try to return an estimate cost factor that can be used as a multiplier
165 /// when scalarizing an operation for a vector with ElementCount \p VF.
166 /// For scalable vectors this currently takes the most pessimistic view based
167 /// upon the maximum possible value for vscale.
168 unsigned getMaxNumElements(ElementCount VF) const {
169 if (!VF.isScalable())
170 return VF.getFixedValue();
171
172 return VF.getKnownMinValue() * ST->getVScaleForTuning();
173 }
174
175 unsigned getMaxInterleaveFactor(ElementCount VF) const override;
176
177 bool prefersVectorizedAddressing() const override;
178
179 InstructionCost
180 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
181 unsigned AddressSpace,
182 TTI::TargetCostKind CostKind) const override;
183
184 InstructionCost
185 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
186 bool VariableMask, Align Alignment,
187 TTI::TargetCostKind CostKind,
188 const Instruction *I = nullptr) const override;
189
190 bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
191 Type *Src) const;
192
193 InstructionCost
194 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
195 TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
196 const Instruction *I = nullptr) const override;
197
198 InstructionCost
199 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
200 unsigned Index,
201 TTI::TargetCostKind CostKind) const override;
202
203 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
204 const Instruction *I = nullptr) const override;
205
206 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
207 TTI::TargetCostKind CostKind,
208 unsigned Index, const Value *Op0,
209 const Value *Op1) const override;
210
211 /// \param ScalarUserAndIdx encodes the information about extracts from a
212 /// vector with 'Scalar' being the value being extracted,'User' being the user
213 /// of the extract(nullptr if user is not known before vectorization) and
214 /// 'Idx' being the extract lane.
215 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
216 TTI::TargetCostKind CostKind,
217 unsigned Index, Value *Scalar,
218 ArrayRef<std::tuple<Value *, User *, int>>
219 ScalarUserAndIdx) const override;
220
221 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
222 TTI::TargetCostKind CostKind,
223 unsigned Index) const override;
224
225 InstructionCost
226 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
227 TTI::TargetCostKind CostKind) const override;
228
229 InstructionCost
230 getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
231 TTI::TargetCostKind CostKind) const;
232
233 InstructionCost getSpliceCost(VectorType *Tp, int Index,
234 TTI::TargetCostKind CostKind) const;
235
236 InstructionCost getArithmeticInstrCost(
237 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
238 TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
239 TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
240 ArrayRef<const Value *> Args = {},
241 const Instruction *CxtI = nullptr) const override;
242
243 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
244 const SCEV *Ptr) const override;
245
246 InstructionCost getCmpSelInstrCost(
247 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
248 TTI::TargetCostKind CostKind,
249 TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
250 TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
251 const Instruction *I = nullptr) const override;
252
253 TTI::MemCmpExpansionOptions
254 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
255 bool useNeonVector(const Type *Ty) const;
256
257 InstructionCost getMemoryOpCost(
258 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
259 TTI::TargetCostKind CostKind,
260 TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
261 const Instruction *I = nullptr) const override;
262
263 InstructionCost
264 getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
265
266 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
267 TTI::UnrollingPreferences &UP,
268 OptimizationRemarkEmitter *ORE) const override;
269
270 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
271 TTI::PeelingPreferences &PP) const override;
272
273 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
274 Type *ExpectedType) const override;
275
276 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
277 MemIntrinsicInfo &Info) const override;
278
279 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
280 if (Ty->isPointerTy())
281 return true;
282
283 if (Ty->isBFloatTy() && ST->hasBF16())
284 return true;
285
286 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
287 return true;
288
289 if (Ty->isIntegerTy(Bitwidth: 1) || Ty->isIntegerTy(Bitwidth: 8) || Ty->isIntegerTy(Bitwidth: 16) ||
290 Ty->isIntegerTy(Bitwidth: 32) || Ty->isIntegerTy(Bitwidth: 64))
291 return true;
292
293 return false;
294 }
295
296 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const {
297 if (!ST->hasSVE())
298 return false;
299
300 // For fixed vectors, avoid scalarization if using SVE for them.
301 if (isa<FixedVectorType>(Val: DataType) && !ST->useSVEForFixedLengthVectors() &&
302 DataType->getPrimitiveSizeInBits() != 128)
303 return false; // Fall back to scalarization of masked operations.
304
305 return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType());
306 }
307
308 bool isLegalMaskedLoad(Type *DataType, Align Alignment,
309 unsigned /*AddressSpace*/) const override {
310 return isLegalMaskedLoadStore(DataType, Alignment);
311 }
312
313 bool isLegalMaskedStore(Type *DataType, Align Alignment,
314 unsigned /*AddressSpace*/) const override {
315 return isLegalMaskedLoadStore(DataType, Alignment);
316 }
317
318 bool isLegalMaskedGatherScatter(Type *DataType) const {
319 if (!ST->isSVEAvailable())
320 return false;
321
322 // For fixed vectors, scalarize if not using SVE for them.
323 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(Val: DataType);
324 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
325 DataTypeFVTy->getNumElements() < 2))
326 return false;
327
328 return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType());
329 }
330
331 bool isLegalMaskedGather(Type *DataType, Align Alignment) const override {
332 return isLegalMaskedGatherScatter(DataType);
333 }
334
335 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override {
336 return isLegalMaskedGatherScatter(DataType);
337 }
338
339 bool isLegalBroadcastLoad(Type *ElementTy,
340 ElementCount NumElements) const override {
341 // Return true if we can generate a `ld1r` splat load instruction.
342 if (!ST->hasNEON() || NumElements.isScalable())
343 return false;
344 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
345 case 8:
346 case 16:
347 case 32:
348 case 64: {
349 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
350 unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
351 return VectorBits >= 64;
352 }
353 }
354 return false;
355 }
356
357 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const {
358 // NOTE: The logic below is mostly geared towards LV, which calls it with
359 // vectors with 2 elements. We might want to improve that, if other
360 // users show up.
361 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
362 // the vector can be halved so that each half fits into a register. That's
363 // the case if the element type fits into a register and the number of
364 // elements is a power of 2 > 1.
365 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(Val: DataType)) {
366 unsigned NumElements = DataTypeTy->getNumElements();
367 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
368 return NumElements > 1 && isPowerOf2_64(Value: NumElements) && EltSize >= 8 &&
369 EltSize <= 128 && isPowerOf2_64(Value: EltSize);
370 }
371 return BaseT::isLegalNTStore(DataType, Alignment);
372 }
373
374 bool isLegalNTStore(Type *DataType, Align Alignment) const override {
375 return isLegalNTStoreLoad(DataType, Alignment);
376 }
377
378 bool isLegalNTLoad(Type *DataType, Align Alignment) const override {
379 // Only supports little-endian targets.
380 if (ST->isLittleEndian())
381 return isLegalNTStoreLoad(DataType, Alignment);
382 return BaseT::isLegalNTLoad(DataType, Alignment);
383 }
384
385 InstructionCost getPartialReductionCost(
386 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
387 ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
388 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
389 TTI::TargetCostKind CostKind) const override;
390
391 bool enableOrderedReductions() const override { return true; }
392
393 InstructionCost getInterleavedMemoryOpCost(
394 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
395 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
396 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
397
398 bool shouldConsiderAddressTypePromotion(
399 const Instruction &I,
400 bool &AllowPromotionWithoutCommonHeader) const override;
401
402 bool shouldExpandReduction(const IntrinsicInst *II) const override {
403 return false;
404 }
405
406 unsigned getGISelRematGlobalCost() const override { return 2; }
407
408 unsigned getMinTripCountTailFoldingThreshold() const override {
409 return ST->hasSVE() ? 5 : 0;
410 }
411
412 TailFoldingStyle
413 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
414 if (ST->hasSVE())
415 return IVUpdateMayOverflow
416 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
417 : TailFoldingStyle::DataAndControlFlow;
418
419 return TailFoldingStyle::DataWithoutLaneMask;
420 }
421
422 bool preferFixedOverScalableIfEqualCost() const override;
423
424 unsigned getEpilogueVectorizationMinVF() const override;
425
426 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override;
427
428 bool supportsScalableVectors() const override {
429 return ST->isSVEorStreamingSVEAvailable();
430 }
431
432 bool enableScalableVectorization() const override;
433
434 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
435 ElementCount VF) const override;
436
437 bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); }
438
439 InstructionCost
440 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
441 std::optional<FastMathFlags> FMF,
442 TTI::TargetCostKind CostKind) const override;
443
444 InstructionCost
445 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
446 VectorType *ValTy, std::optional<FastMathFlags> FMF,
447 TTI::TargetCostKind CostKind) const override;
448
449 InstructionCost getMulAccReductionCost(
450 bool IsUnsigned, Type *ResTy, VectorType *Ty,
451 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
452
453 InstructionCost
454 getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
455 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
456 VectorType *SubTp, ArrayRef<const Value *> Args = {},
457 const Instruction *CxtI = nullptr) const override;
458
459 InstructionCost getScalarizationOverhead(
460 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
461 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
462 ArrayRef<Value *> VL = {}) const override;
463
464 /// Return the cost of the scaling factor used in the addressing
465 /// mode represented by AM for this target, for a load/store
466 /// of the specified type.
467 /// If the AM is supported, the return value must be >= 0.
468 /// If the AM is not supported, it returns an invalid cost.
469 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
470 StackOffset BaseOffset, bool HasBaseReg,
471 int64_t Scale,
472 unsigned AddrSpace) const override;
473
474 bool enableSelectOptimize() const override {
475 return ST->enableSelectOptimize();
476 }
477
478 bool shouldTreatInstructionLikeSelect(const Instruction *I) const override;
479
480 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
481 Type *ScalarValTy) const override {
482 // We can vectorize store v4i8.
483 if (ScalarMemTy->isIntegerTy(Bitwidth: 8) && isPowerOf2_32(Value: VF) && VF >= 4)
484 return 4;
485
486 return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
487 }
488
489 std::optional<unsigned> getMinPageSize() const override { return 4096; }
490
491 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
492 const TargetTransformInfo::LSRCost &C2) const override;
493
494 bool isProfitableToSinkOperands(Instruction *I,
495 SmallVectorImpl<Use *> &Ops) const override;
496 /// @}
497};
498
499} // end namespace llvm
500
501#endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
502