| 1 | //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file a TargetTransformInfoImplBase conforming object specific to the |
| 10 | /// AArch64 target machine. It uses the target's detailed information to |
| 11 | /// provide more precise answers to certain TTI queries, while letting the |
| 12 | /// target independent and default TTI implementations handle the rest. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 17 | #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 18 | |
| 19 | #include "AArch64.h" |
| 20 | #include "AArch64Subtarget.h" |
| 21 | #include "AArch64TargetMachine.h" |
| 22 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 23 | #include "llvm/CodeGen/BasicTTIImpl.h" |
| 24 | #include "llvm/IR/FMF.h" |
| 25 | #include "llvm/IR/Function.h" |
| 26 | #include "llvm/IR/Intrinsics.h" |
| 27 | #include "llvm/Support/InstructionCost.h" |
| 28 | #include <cstdint> |
| 29 | #include <optional> |
| 30 | |
| 31 | namespace llvm { |
| 32 | |
| 33 | class APInt; |
| 34 | class Instruction; |
| 35 | class IntrinsicInst; |
| 36 | class Loop; |
| 37 | class SCEV; |
| 38 | class ScalarEvolution; |
| 39 | class Type; |
| 40 | class Value; |
| 41 | class VectorType; |
| 42 | |
| 43 | class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> { |
| 44 | using BaseT = BasicTTIImplBase<AArch64TTIImpl>; |
| 45 | using TTI = TargetTransformInfo; |
| 46 | |
| 47 | friend BaseT; |
| 48 | |
| 49 | const AArch64Subtarget *ST; |
| 50 | const AArch64TargetLowering *TLI; |
| 51 | |
| 52 | static const FeatureBitset InlineInverseFeatures; |
| 53 | |
| 54 | const AArch64Subtarget *getST() const { return ST; } |
| 55 | const AArch64TargetLowering *getTLI() const { return TLI; } |
| 56 | |
| 57 | enum MemIntrinsicType { |
| 58 | VECTOR_LDST_TWO_ELEMENTS, |
| 59 | VECTOR_LDST_THREE_ELEMENTS, |
| 60 | VECTOR_LDST_FOUR_ELEMENTS |
| 61 | }; |
| 62 | |
| 63 | /// Given a add/sub/mul operation, detect a widening addl/subl/mull pattern |
| 64 | /// where both operands can be treated like extends. Returns the minimal type |
| 65 | /// needed to compute the operation. |
| 66 | Type *isBinExtWideningInstruction(unsigned Opcode, Type *DstTy, |
| 67 | ArrayRef<const Value *> Args, |
| 68 | Type *SrcOverrideTy = nullptr) const; |
| 69 | /// Given a add/sub operation with a single extend operand, detect a |
| 70 | /// widening addw/subw pattern. |
| 71 | bool isSingleExtWideningInstruction(unsigned Opcode, Type *DstTy, |
| 72 | ArrayRef<const Value *> Args, |
| 73 | Type *SrcOverrideTy = nullptr) const; |
| 74 | |
| 75 | // A helper function called by 'getVectorInstrCost'. |
| 76 | // |
| 77 | // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; |
| 78 | // \param ScalarUserAndIdx encodes the information about extracts from a |
| 79 | /// vector with 'Scalar' being the value being extracted,'User' being the user |
| 80 | /// of the extract(nullptr if user is not known before vectorization) and |
| 81 | /// 'Idx' being the extract lane. |
| 82 | InstructionCost getVectorInstrCostHelper( |
| 83 | unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, |
| 84 | const Instruction *I = nullptr, Value *Scalar = nullptr, |
| 85 | ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}, |
| 86 | TTI::VectorInstrContext VIC = TTI::VectorInstrContext::None) const; |
| 87 | |
| 88 | public: |
| 89 | explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) |
| 90 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
| 91 | TLI(ST->getTargetLowering()) {} |
| 92 | |
| 93 | bool areInlineCompatible(const Function *Caller, |
| 94 | const Function *Callee) const override; |
| 95 | |
| 96 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
| 97 | ArrayRef<Type *> Types) const override; |
| 98 | |
| 99 | unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, |
| 100 | unsigned DefaultCallPenalty) const override; |
| 101 | |
| 102 | APInt getFeatureMask(const Function &F) const override; |
| 103 | APInt getPriorityMask(const Function &F) const override; |
| 104 | |
| 105 | bool isMultiversionedFunction(const Function &F) const override; |
| 106 | |
| 107 | /// \name Scalar TTI Implementations |
| 108 | /// @{ |
| 109 | |
| 110 | using BaseT::getIntImmCost; |
| 111 | InstructionCost getIntImmCost(int64_t Val) const; |
| 112 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
| 113 | TTI::TargetCostKind CostKind) const override; |
| 114 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
| 115 | const APInt &Imm, Type *Ty, |
| 116 | TTI::TargetCostKind CostKind, |
| 117 | Instruction *Inst = nullptr) const override; |
| 118 | InstructionCost |
| 119 | getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, |
| 120 | Type *Ty, TTI::TargetCostKind CostKind) const override; |
| 121 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; |
| 122 | |
| 123 | /// @} |
| 124 | |
| 125 | /// \name Vector TTI Implementations |
| 126 | /// @{ |
| 127 | |
| 128 | bool enableInterleavedAccessVectorization() const override { return true; } |
| 129 | |
| 130 | bool enableMaskedInterleavedAccessVectorization() const override { |
| 131 | return ST->hasSVE(); |
| 132 | } |
| 133 | |
| 134 | unsigned getNumberOfRegisters(unsigned ClassID) const override { |
| 135 | bool Vector = (ClassID == 1); |
| 136 | if (Vector) { |
| 137 | if (ST->hasNEON()) |
| 138 | return 32; |
| 139 | return 0; |
| 140 | } |
| 141 | return 31; |
| 142 | } |
| 143 | |
| 144 | InstructionCost |
| 145 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
| 146 | TTI::TargetCostKind CostKind) const override; |
| 147 | |
| 148 | std::optional<Instruction *> |
| 149 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; |
| 150 | |
| 151 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
| 152 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
| 153 | APInt &UndefElts2, APInt &UndefElts3, |
| 154 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
| 155 | SimplifyAndSetOp) const override; |
| 156 | |
| 157 | TypeSize |
| 158 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; |
| 159 | |
| 160 | unsigned getMinVectorRegisterBitWidth() const override { |
| 161 | return ST->getMinVectorRegisterBitWidth(); |
| 162 | } |
| 163 | |
| 164 | std::optional<unsigned> getVScaleForTuning() const override { |
| 165 | return ST->getVScaleForTuning(); |
| 166 | } |
| 167 | |
| 168 | bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } |
| 169 | |
| 170 | bool shouldMaximizeVectorBandwidth( |
| 171 | TargetTransformInfo::RegisterKind K) const override; |
| 172 | |
| 173 | /// Try to return an estimate cost factor that can be used as a multiplier |
| 174 | /// when scalarizing an operation for a vector with ElementCount \p VF. |
| 175 | /// For scalable vectors this currently takes the most pessimistic view based |
| 176 | /// upon the maximum possible value for vscale. |
| 177 | unsigned getMaxNumElements(ElementCount VF) const { |
| 178 | if (!VF.isScalable()) |
| 179 | return VF.getFixedValue(); |
| 180 | |
| 181 | return VF.getKnownMinValue() * ST->getVScaleForTuning(); |
| 182 | } |
| 183 | |
| 184 | unsigned getMaxInterleaveFactor(ElementCount VF) const override; |
| 185 | |
| 186 | bool prefersVectorizedAddressing() const override; |
| 187 | |
| 188 | /// Check whether Opcode1 has less throughput according to the scheduling |
| 189 | /// model than Opcode2. |
| 190 | bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, |
| 191 | unsigned Opcode2) const; |
| 192 | |
| 193 | InstructionCost |
| 194 | getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, |
| 195 | TTI::TargetCostKind CostKind) const override; |
| 196 | |
| 197 | InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, |
| 198 | TTI::TargetCostKind CostKind) const; |
| 199 | |
| 200 | InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, |
| 201 | TTI::TargetCostKind CostKind) const; |
| 202 | |
| 203 | bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, |
| 204 | Type *Src) const; |
| 205 | |
| 206 | InstructionCost |
| 207 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
| 208 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, |
| 209 | const Instruction *I = nullptr) const override; |
| 210 | |
| 211 | InstructionCost |
| 212 | (unsigned Opcode, Type *Dst, VectorType *VecTy, |
| 213 | unsigned Index, |
| 214 | TTI::TargetCostKind CostKind) const override; |
| 215 | |
| 216 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
| 217 | const Instruction *I = nullptr) const override; |
| 218 | |
| 219 | InstructionCost |
| 220 | getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, |
| 221 | unsigned Index, const Value *Op0, const Value *Op1, |
| 222 | TTI::VectorInstrContext VIC = |
| 223 | TTI::VectorInstrContext::None) const override; |
| 224 | |
| 225 | /// \param ScalarUserAndIdx encodes the information about extracts from a |
| 226 | /// vector with 'Scalar' being the value being extracted,'User' being the user |
| 227 | /// of the extract(nullptr if user is not known before vectorization) and |
| 228 | /// 'Idx' being the extract lane. |
| 229 | InstructionCost getVectorInstrCost( |
| 230 | unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, |
| 231 | Value *Scalar, |
| 232 | ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx, |
| 233 | TTI::VectorInstrContext VIC = |
| 234 | TTI::VectorInstrContext::None) const override; |
| 235 | |
| 236 | InstructionCost |
| 237 | getVectorInstrCost(const Instruction &I, Type *Val, |
| 238 | TTI::TargetCostKind CostKind, unsigned Index, |
| 239 | TTI::VectorInstrContext VIC = |
| 240 | TTI::VectorInstrContext::None) const override; |
| 241 | |
| 242 | InstructionCost |
| 243 | getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, |
| 244 | TTI::TargetCostKind CostKind, |
| 245 | unsigned Index) const override; |
| 246 | |
| 247 | InstructionCost |
| 248 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, |
| 249 | TTI::TargetCostKind CostKind) const override; |
| 250 | |
| 251 | InstructionCost |
| 252 | getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, |
| 253 | TTI::TargetCostKind CostKind) const; |
| 254 | |
| 255 | InstructionCost getSpliceCost(VectorType *Tp, int Index, |
| 256 | TTI::TargetCostKind CostKind) const; |
| 257 | |
| 258 | InstructionCost getArithmeticInstrCost( |
| 259 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
| 260 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 261 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 262 | ArrayRef<const Value *> Args = {}, |
| 263 | const Instruction *CxtI = nullptr) const override; |
| 264 | |
| 265 | InstructionCost |
| 266 | getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, |
| 267 | TTI::TargetCostKind CostKind) const override; |
| 268 | |
| 269 | InstructionCost getCmpSelInstrCost( |
| 270 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, |
| 271 | TTI::TargetCostKind CostKind, |
| 272 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 273 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 274 | const Instruction *I = nullptr) const override; |
| 275 | |
| 276 | TTI::MemCmpExpansionOptions |
| 277 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; |
| 278 | bool useNeonVector(const Type *Ty) const; |
| 279 | |
| 280 | InstructionCost getMemoryOpCost( |
| 281 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
| 282 | TTI::TargetCostKind CostKind, |
| 283 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 284 | const Instruction *I = nullptr) const override; |
| 285 | |
| 286 | InstructionCost |
| 287 | getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override; |
| 288 | |
| 289 | void (Loop *L, ScalarEvolution &SE, |
| 290 | TTI::UnrollingPreferences &UP, |
| 291 | OptimizationRemarkEmitter *ORE) const override; |
| 292 | |
| 293 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
| 294 | TTI::PeelingPreferences &PP) const override; |
| 295 | |
| 296 | Value * |
| 297 | getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, |
| 298 | bool CanCreate = true) const override; |
| 299 | |
| 300 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, |
| 301 | MemIntrinsicInfo &Info) const override; |
| 302 | |
| 303 | bool isElementTypeLegalForScalableVector(Type *Ty) const override { |
| 304 | if (Ty->isPointerTy()) |
| 305 | return true; |
| 306 | |
| 307 | if (Ty->isBFloatTy() && ST->hasBF16()) |
| 308 | return true; |
| 309 | |
| 310 | if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) |
| 311 | return true; |
| 312 | |
| 313 | if (Ty->isIntegerTy(Bitwidth: 1) || Ty->isIntegerTy(Bitwidth: 8) || Ty->isIntegerTy(Bitwidth: 16) || |
| 314 | Ty->isIntegerTy(Bitwidth: 32) || Ty->isIntegerTy(Bitwidth: 64)) |
| 315 | return true; |
| 316 | |
| 317 | return false; |
| 318 | } |
| 319 | |
| 320 | bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const { |
| 321 | if (!ST->isSVEorStreamingSVEAvailable()) |
| 322 | return false; |
| 323 | |
| 324 | // For fixed vectors, avoid scalarization if using SVE for them. |
| 325 | if (isa<FixedVectorType>(Val: DataType) && !ST->useSVEForFixedLengthVectors() && |
| 326 | DataType->getPrimitiveSizeInBits() != 128) |
| 327 | return false; // Fall back to scalarization of masked operations. |
| 328 | |
| 329 | return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType()); |
| 330 | } |
| 331 | |
| 332 | bool isLegalMaskedLoad(Type *DataType, Align Alignment, |
| 333 | unsigned /*AddressSpace*/, |
| 334 | TTI::MaskKind /*MaskKind*/) const override { |
| 335 | return isLegalMaskedLoadStore(DataType, Alignment); |
| 336 | } |
| 337 | |
| 338 | bool isLegalMaskedStore(Type *DataType, Align Alignment, |
| 339 | unsigned /*AddressSpace*/, |
| 340 | TTI::MaskKind /*MaskKind*/) const override { |
| 341 | return isLegalMaskedLoadStore(DataType, Alignment); |
| 342 | } |
| 343 | |
| 344 | bool isElementTypeLegalForCompressStore(Type *Ty) const { |
| 345 | return Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isIntegerTy(Bitwidth: 32) || |
| 346 | Ty->isIntegerTy(Bitwidth: 64); |
| 347 | } |
| 348 | |
| 349 | bool isLegalMaskedCompressStore(Type *DataType, |
| 350 | Align Alignment) const override { |
| 351 | if (!ST->isSVEAvailable()) |
| 352 | return false; |
| 353 | |
| 354 | if (isa<FixedVectorType>(Val: DataType) && |
| 355 | DataType->getPrimitiveSizeInBits() < 128) |
| 356 | return false; |
| 357 | |
| 358 | return isElementTypeLegalForCompressStore(Ty: DataType->getScalarType()); |
| 359 | } |
| 360 | |
| 361 | bool isLegalMaskedGatherScatter(Type *DataType) const { |
| 362 | if (!ST->isSVEAvailable()) |
| 363 | return false; |
| 364 | |
| 365 | // For fixed vectors, scalarize if not using SVE for them. |
| 366 | auto *DataTypeFVTy = dyn_cast<FixedVectorType>(Val: DataType); |
| 367 | if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || |
| 368 | DataTypeFVTy->getNumElements() < 2)) |
| 369 | return false; |
| 370 | |
| 371 | return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType()); |
| 372 | } |
| 373 | |
| 374 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const override { |
| 375 | return isLegalMaskedGatherScatter(DataType); |
| 376 | } |
| 377 | |
| 378 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override { |
| 379 | return isLegalMaskedGatherScatter(DataType); |
| 380 | } |
| 381 | |
| 382 | bool isLegalBroadcastLoad(Type *ElementTy, |
| 383 | ElementCount NumElements) const override { |
| 384 | // Return true if we can generate a `ld1r` splat load instruction. |
| 385 | if (!ST->hasNEON() || NumElements.isScalable()) |
| 386 | return false; |
| 387 | switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { |
| 388 | case 8: |
| 389 | case 16: |
| 390 | case 32: |
| 391 | case 64: { |
| 392 | // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. |
| 393 | unsigned VectorBits = NumElements.getFixedValue() * ElementBits; |
| 394 | return VectorBits >= 64; |
| 395 | } |
| 396 | } |
| 397 | return false; |
| 398 | } |
| 399 | |
| 400 | std::optional<bool> isLegalNTStoreLoad(Type *DataType, |
| 401 | Align Alignment) const { |
| 402 | // Currently we only support NT load and store lowering for little-endian |
| 403 | // targets. |
| 404 | // |
| 405 | // Coordinated with LDNP and STNP constraints in |
| 406 | // `llvm/lib/Target/AArch64/AArch64InstrInfo.td` and |
| 407 | // `AArch64TargetLowering` |
| 408 | if (!ST->isLittleEndian()) |
| 409 | return false; |
| 410 | |
| 411 | // NOTE: The logic below is mostly geared towards LV, which calls it with |
| 412 | // vectors with 2 elements. We might want to improve that, if other |
| 413 | // users show up. |
| 414 | // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if |
| 415 | // the vector can be halved so that each half fits into a register. That's |
| 416 | // the case if the element type fits into a register and the number of |
| 417 | // elements is a power of 2 > 1. |
| 418 | if (auto *DataTypeTy = dyn_cast<FixedVectorType>(Val: DataType)) { |
| 419 | unsigned NumElements = DataTypeTy->getNumElements(); |
| 420 | unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); |
| 421 | return NumElements > 1 && isPowerOf2_64(Value: NumElements) && EltSize >= 8 && |
| 422 | EltSize <= 128 && isPowerOf2_64(Value: EltSize); |
| 423 | } |
| 424 | return std::nullopt; |
| 425 | } |
| 426 | |
| 427 | bool isLegalNTStore(Type *DataType, Align Alignment) const override { |
| 428 | if (auto Result = isLegalNTStoreLoad(DataType, Alignment)) |
| 429 | return *Result; |
| 430 | // Fallback to target independent logic |
| 431 | return BaseT::isLegalNTStore(DataType, Alignment); |
| 432 | } |
| 433 | |
| 434 | bool isLegalNTLoad(Type *DataType, Align Alignment) const override { |
| 435 | if (auto Result = isLegalNTStoreLoad(DataType, Alignment)) |
| 436 | return *Result; |
| 437 | // Fallback to target independent logic |
| 438 | return BaseT::isLegalNTLoad(DataType, Alignment); |
| 439 | } |
| 440 | |
| 441 | InstructionCost getPartialReductionCost( |
| 442 | unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, |
| 443 | ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, |
| 444 | TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp, |
| 445 | TTI::TargetCostKind CostKind, |
| 446 | std::optional<FastMathFlags> FMF) const override; |
| 447 | |
| 448 | bool enableOrderedReductions() const override { return true; } |
| 449 | |
| 450 | InstructionCost getInterleavedMemoryOpCost( |
| 451 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
| 452 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
| 453 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; |
| 454 | |
| 455 | bool shouldConsiderAddressTypePromotion( |
| 456 | const Instruction &I, |
| 457 | bool &) const override; |
| 458 | |
| 459 | bool shouldExpandReduction(const IntrinsicInst *II) const override { |
| 460 | return false; |
| 461 | } |
| 462 | |
| 463 | unsigned getGISelRematGlobalCost() const override { return 2; } |
| 464 | |
| 465 | unsigned getMinTripCountTailFoldingThreshold() const override { |
| 466 | return ST->hasSVE() ? 5 : 0; |
| 467 | } |
| 468 | |
| 469 | TailFoldingStyle |
| 470 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { |
| 471 | if (ST->hasSVE()) |
| 472 | return IVUpdateMayOverflow |
| 473 | ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck |
| 474 | : TailFoldingStyle::DataAndControlFlow; |
| 475 | |
| 476 | return TailFoldingStyle::DataWithoutLaneMask; |
| 477 | } |
| 478 | |
| 479 | bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override; |
| 480 | |
| 481 | unsigned getEpilogueVectorizationMinVF() const override; |
| 482 | |
| 483 | bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override; |
| 484 | |
| 485 | bool supportsScalableVectors() const override { |
| 486 | return ST->isSVEorStreamingSVEAvailable(); |
| 487 | } |
| 488 | |
| 489 | bool enableScalableVectorization() const override; |
| 490 | |
| 491 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
| 492 | ElementCount VF) const override; |
| 493 | |
| 494 | bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); } |
| 495 | |
| 496 | /// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the |
| 497 | /// architecture features are not present. |
| 498 | std::optional<InstructionCost> getFP16BF16PromoteCost( |
| 499 | Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, |
| 500 | TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, |
| 501 | std::function<InstructionCost(Type *)> InstCost) const; |
| 502 | |
| 503 | InstructionCost |
| 504 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
| 505 | std::optional<FastMathFlags> FMF, |
| 506 | TTI::TargetCostKind CostKind) const override; |
| 507 | |
| 508 | InstructionCost |
| 509 | getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, |
| 510 | VectorType *ValTy, std::optional<FastMathFlags> FMF, |
| 511 | TTI::TargetCostKind CostKind) const override; |
| 512 | |
| 513 | InstructionCost getMulAccReductionCost( |
| 514 | bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, |
| 515 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override; |
| 516 | |
| 517 | InstructionCost |
| 518 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, |
| 519 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, |
| 520 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, |
| 521 | const Instruction *CxtI = nullptr) const override; |
| 522 | |
| 523 | InstructionCost |
| 524 | getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, |
| 525 | bool Insert, bool , |
| 526 | TTI::TargetCostKind CostKind, |
| 527 | bool ForPoisonSrc = true, ArrayRef<Value *> VL = {}, |
| 528 | TTI::VectorInstrContext VIC = |
| 529 | TTI::VectorInstrContext::None) const override; |
| 530 | |
| 531 | /// Return the cost of the scaling factor used in the addressing |
| 532 | /// mode represented by AM for this target, for a load/store |
| 533 | /// of the specified type. |
| 534 | /// If the AM is supported, the return value must be >= 0. |
| 535 | /// If the AM is not supported, it returns an invalid cost. |
| 536 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
| 537 | StackOffset BaseOffset, bool HasBaseReg, |
| 538 | int64_t Scale, |
| 539 | unsigned AddrSpace) const override; |
| 540 | |
| 541 | bool enableSelectOptimize() const override { |
| 542 | return ST->enableSelectOptimize(); |
| 543 | } |
| 544 | |
| 545 | bool shouldTreatInstructionLikeSelect(const Instruction *I) const override; |
| 546 | |
| 547 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, |
| 548 | Type *ScalarValTy) const override { |
| 549 | // We can vectorize store v4i8. |
| 550 | if (ScalarMemTy->isIntegerTy(Bitwidth: 8) && isPowerOf2_32(Value: VF) && VF >= 4) |
| 551 | return 4; |
| 552 | |
| 553 | return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); |
| 554 | } |
| 555 | |
| 556 | std::optional<unsigned> getMinPageSize() const override { return 4096; } |
| 557 | |
| 558 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
| 559 | const TargetTransformInfo::LSRCost &C2) const override; |
| 560 | |
| 561 | bool isProfitableToSinkOperands(Instruction *I, |
| 562 | SmallVectorImpl<Use *> &Ops) const override; |
| 563 | |
| 564 | bool enableAggressiveInterleaving(bool) const override { |
| 565 | return ST->enableAggressiveInterleaving(); |
| 566 | } |
| 567 | /// @} |
| 568 | }; |
| 569 | |
| 570 | } // end namespace llvm |
| 571 | |
| 572 | #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 573 | |