| 1 | //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file a TargetTransformInfoImplBase conforming object specific to the |
| 10 | /// AArch64 target machine. It uses the target's detailed information to |
| 11 | /// provide more precise answers to certain TTI queries, while letting the |
| 12 | /// target independent and default TTI implementations handle the rest. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 17 | #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 18 | |
| 19 | #include "AArch64.h" |
| 20 | #include "AArch64Subtarget.h" |
| 21 | #include "AArch64TargetMachine.h" |
| 22 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 23 | #include "llvm/CodeGen/BasicTTIImpl.h" |
| 24 | #include "llvm/IR/Function.h" |
| 25 | #include "llvm/IR/Intrinsics.h" |
| 26 | #include "llvm/Support/InstructionCost.h" |
| 27 | #include <cstdint> |
| 28 | #include <optional> |
| 29 | |
| 30 | namespace llvm { |
| 31 | |
| 32 | class APInt; |
| 33 | class Instruction; |
| 34 | class IntrinsicInst; |
| 35 | class Loop; |
| 36 | class SCEV; |
| 37 | class ScalarEvolution; |
| 38 | class Type; |
| 39 | class Value; |
| 40 | class VectorType; |
| 41 | |
| 42 | class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> { |
| 43 | using BaseT = BasicTTIImplBase<AArch64TTIImpl>; |
| 44 | using TTI = TargetTransformInfo; |
| 45 | |
| 46 | friend BaseT; |
| 47 | |
| 48 | const AArch64Subtarget *ST; |
| 49 | const AArch64TargetLowering *TLI; |
| 50 | |
| 51 | static const FeatureBitset InlineInverseFeatures; |
| 52 | |
| 53 | const AArch64Subtarget *getST() const { return ST; } |
| 54 | const AArch64TargetLowering *getTLI() const { return TLI; } |
| 55 | |
| 56 | enum MemIntrinsicType { |
| 57 | VECTOR_LDST_TWO_ELEMENTS, |
| 58 | VECTOR_LDST_THREE_ELEMENTS, |
| 59 | VECTOR_LDST_FOUR_ELEMENTS |
| 60 | }; |
| 61 | |
| 62 | bool isWideningInstruction(Type *DstTy, unsigned Opcode, |
| 63 | ArrayRef<const Value *> Args, |
| 64 | Type *SrcOverrideTy = nullptr) const; |
| 65 | |
| 66 | // A helper function called by 'getVectorInstrCost'. |
| 67 | // |
| 68 | // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' |
| 69 | // indicates whether the vector instruction is available in the input IR or |
| 70 | // just imaginary in vectorizer passes. |
| 71 | /// \param ScalarUserAndIdx encodes the information about extracts from a |
| 72 | /// vector with 'Scalar' being the value being extracted,'User' being the user |
| 73 | /// of the extract(nullptr if user is not known before vectorization) and |
| 74 | /// 'Idx' being the extract lane. |
| 75 | InstructionCost getVectorInstrCostHelper( |
| 76 | unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, |
| 77 | bool HasRealUse, const Instruction *I = nullptr, Value *Scalar = nullptr, |
| 78 | ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const; |
| 79 | |
| 80 | public: |
| 81 | explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) |
| 82 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
| 83 | TLI(ST->getTargetLowering()) {} |
| 84 | |
| 85 | bool areInlineCompatible(const Function *Caller, |
| 86 | const Function *Callee) const override; |
| 87 | |
| 88 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
| 89 | const ArrayRef<Type *> &Types) const override; |
| 90 | |
| 91 | unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, |
| 92 | unsigned DefaultCallPenalty) const override; |
| 93 | |
| 94 | uint64_t getFeatureMask(const Function &F) const override; |
| 95 | |
| 96 | bool isMultiversionedFunction(const Function &F) const override; |
| 97 | |
| 98 | /// \name Scalar TTI Implementations |
| 99 | /// @{ |
| 100 | |
| 101 | using BaseT::getIntImmCost; |
| 102 | InstructionCost getIntImmCost(int64_t Val) const; |
| 103 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
| 104 | TTI::TargetCostKind CostKind) const override; |
| 105 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
| 106 | const APInt &Imm, Type *Ty, |
| 107 | TTI::TargetCostKind CostKind, |
| 108 | Instruction *Inst = nullptr) const override; |
| 109 | InstructionCost |
| 110 | getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, |
| 111 | Type *Ty, TTI::TargetCostKind CostKind) const override; |
| 112 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; |
| 113 | |
| 114 | /// @} |
| 115 | |
| 116 | /// \name Vector TTI Implementations |
| 117 | /// @{ |
| 118 | |
| 119 | bool enableInterleavedAccessVectorization() const override { return true; } |
| 120 | |
| 121 | bool enableMaskedInterleavedAccessVectorization() const override { |
| 122 | return ST->hasSVE(); |
| 123 | } |
| 124 | |
| 125 | unsigned getNumberOfRegisters(unsigned ClassID) const override { |
| 126 | bool Vector = (ClassID == 1); |
| 127 | if (Vector) { |
| 128 | if (ST->hasNEON()) |
| 129 | return 32; |
| 130 | return 0; |
| 131 | } |
| 132 | return 31; |
| 133 | } |
| 134 | |
| 135 | InstructionCost |
| 136 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
| 137 | TTI::TargetCostKind CostKind) const override; |
| 138 | |
| 139 | std::optional<Instruction *> |
| 140 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; |
| 141 | |
| 142 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
| 143 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
| 144 | APInt &UndefElts2, APInt &UndefElts3, |
| 145 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
| 146 | SimplifyAndSetOp) const override; |
| 147 | |
| 148 | TypeSize |
| 149 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; |
| 150 | |
| 151 | unsigned getMinVectorRegisterBitWidth() const override { |
| 152 | return ST->getMinVectorRegisterBitWidth(); |
| 153 | } |
| 154 | |
| 155 | std::optional<unsigned> getVScaleForTuning() const override { |
| 156 | return ST->getVScaleForTuning(); |
| 157 | } |
| 158 | |
| 159 | bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } |
| 160 | |
| 161 | bool shouldMaximizeVectorBandwidth( |
| 162 | TargetTransformInfo::RegisterKind K) const override; |
| 163 | |
| 164 | /// Try to return an estimate cost factor that can be used as a multiplier |
| 165 | /// when scalarizing an operation for a vector with ElementCount \p VF. |
| 166 | /// For scalable vectors this currently takes the most pessimistic view based |
| 167 | /// upon the maximum possible value for vscale. |
| 168 | unsigned getMaxNumElements(ElementCount VF) const { |
| 169 | if (!VF.isScalable()) |
| 170 | return VF.getFixedValue(); |
| 171 | |
| 172 | return VF.getKnownMinValue() * ST->getVScaleForTuning(); |
| 173 | } |
| 174 | |
| 175 | unsigned getMaxInterleaveFactor(ElementCount VF) const override; |
| 176 | |
| 177 | bool prefersVectorizedAddressing() const override; |
| 178 | |
| 179 | InstructionCost |
| 180 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
| 181 | unsigned AddressSpace, |
| 182 | TTI::TargetCostKind CostKind) const override; |
| 183 | |
| 184 | InstructionCost |
| 185 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, |
| 186 | bool VariableMask, Align Alignment, |
| 187 | TTI::TargetCostKind CostKind, |
| 188 | const Instruction *I = nullptr) const override; |
| 189 | |
| 190 | bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, |
| 191 | Type *Src) const; |
| 192 | |
| 193 | InstructionCost |
| 194 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
| 195 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, |
| 196 | const Instruction *I = nullptr) const override; |
| 197 | |
| 198 | InstructionCost |
| 199 | (unsigned Opcode, Type *Dst, VectorType *VecTy, |
| 200 | unsigned Index, |
| 201 | TTI::TargetCostKind CostKind) const override; |
| 202 | |
| 203 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
| 204 | const Instruction *I = nullptr) const override; |
| 205 | |
| 206 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
| 207 | TTI::TargetCostKind CostKind, |
| 208 | unsigned Index, const Value *Op0, |
| 209 | const Value *Op1) const override; |
| 210 | |
| 211 | /// \param ScalarUserAndIdx encodes the information about extracts from a |
| 212 | /// vector with 'Scalar' being the value being extracted,'User' being the user |
| 213 | /// of the extract(nullptr if user is not known before vectorization) and |
| 214 | /// 'Idx' being the extract lane. |
| 215 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
| 216 | TTI::TargetCostKind CostKind, |
| 217 | unsigned Index, Value *Scalar, |
| 218 | ArrayRef<std::tuple<Value *, User *, int>> |
| 219 | ScalarUserAndIdx) const override; |
| 220 | |
| 221 | InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, |
| 222 | TTI::TargetCostKind CostKind, |
| 223 | unsigned Index) const override; |
| 224 | |
| 225 | InstructionCost |
| 226 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, |
| 227 | TTI::TargetCostKind CostKind) const override; |
| 228 | |
| 229 | InstructionCost |
| 230 | getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, |
| 231 | TTI::TargetCostKind CostKind) const; |
| 232 | |
| 233 | InstructionCost getSpliceCost(VectorType *Tp, int Index, |
| 234 | TTI::TargetCostKind CostKind) const; |
| 235 | |
| 236 | InstructionCost getArithmeticInstrCost( |
| 237 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
| 238 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 239 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 240 | ArrayRef<const Value *> Args = {}, |
| 241 | const Instruction *CxtI = nullptr) const override; |
| 242 | |
| 243 | InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, |
| 244 | const SCEV *Ptr) const override; |
| 245 | |
| 246 | InstructionCost getCmpSelInstrCost( |
| 247 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, |
| 248 | TTI::TargetCostKind CostKind, |
| 249 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 250 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 251 | const Instruction *I = nullptr) const override; |
| 252 | |
| 253 | TTI::MemCmpExpansionOptions |
| 254 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; |
| 255 | bool useNeonVector(const Type *Ty) const; |
| 256 | |
| 257 | InstructionCost getMemoryOpCost( |
| 258 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
| 259 | TTI::TargetCostKind CostKind, |
| 260 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 261 | const Instruction *I = nullptr) const override; |
| 262 | |
| 263 | InstructionCost |
| 264 | getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override; |
| 265 | |
| 266 | void (Loop *L, ScalarEvolution &SE, |
| 267 | TTI::UnrollingPreferences &UP, |
| 268 | OptimizationRemarkEmitter *ORE) const override; |
| 269 | |
| 270 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
| 271 | TTI::PeelingPreferences &PP) const override; |
| 272 | |
| 273 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |
| 274 | Type *ExpectedType) const override; |
| 275 | |
| 276 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, |
| 277 | MemIntrinsicInfo &Info) const override; |
| 278 | |
| 279 | bool isElementTypeLegalForScalableVector(Type *Ty) const override { |
| 280 | if (Ty->isPointerTy()) |
| 281 | return true; |
| 282 | |
| 283 | if (Ty->isBFloatTy() && ST->hasBF16()) |
| 284 | return true; |
| 285 | |
| 286 | if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) |
| 287 | return true; |
| 288 | |
| 289 | if (Ty->isIntegerTy(Bitwidth: 1) || Ty->isIntegerTy(Bitwidth: 8) || Ty->isIntegerTy(Bitwidth: 16) || |
| 290 | Ty->isIntegerTy(Bitwidth: 32) || Ty->isIntegerTy(Bitwidth: 64)) |
| 291 | return true; |
| 292 | |
| 293 | return false; |
| 294 | } |
| 295 | |
| 296 | bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const { |
| 297 | if (!ST->hasSVE()) |
| 298 | return false; |
| 299 | |
| 300 | // For fixed vectors, avoid scalarization if using SVE for them. |
| 301 | if (isa<FixedVectorType>(Val: DataType) && !ST->useSVEForFixedLengthVectors() && |
| 302 | DataType->getPrimitiveSizeInBits() != 128) |
| 303 | return false; // Fall back to scalarization of masked operations. |
| 304 | |
| 305 | return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType()); |
| 306 | } |
| 307 | |
| 308 | bool isLegalMaskedLoad(Type *DataType, Align Alignment, |
| 309 | unsigned /*AddressSpace*/) const override { |
| 310 | return isLegalMaskedLoadStore(DataType, Alignment); |
| 311 | } |
| 312 | |
| 313 | bool isLegalMaskedStore(Type *DataType, Align Alignment, |
| 314 | unsigned /*AddressSpace*/) const override { |
| 315 | return isLegalMaskedLoadStore(DataType, Alignment); |
| 316 | } |
| 317 | |
| 318 | bool isLegalMaskedGatherScatter(Type *DataType) const { |
| 319 | if (!ST->isSVEAvailable()) |
| 320 | return false; |
| 321 | |
| 322 | // For fixed vectors, scalarize if not using SVE for them. |
| 323 | auto *DataTypeFVTy = dyn_cast<FixedVectorType>(Val: DataType); |
| 324 | if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || |
| 325 | DataTypeFVTy->getNumElements() < 2)) |
| 326 | return false; |
| 327 | |
| 328 | return isElementTypeLegalForScalableVector(Ty: DataType->getScalarType()); |
| 329 | } |
| 330 | |
| 331 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const override { |
| 332 | return isLegalMaskedGatherScatter(DataType); |
| 333 | } |
| 334 | |
| 335 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override { |
| 336 | return isLegalMaskedGatherScatter(DataType); |
| 337 | } |
| 338 | |
| 339 | bool isLegalBroadcastLoad(Type *ElementTy, |
| 340 | ElementCount NumElements) const override { |
| 341 | // Return true if we can generate a `ld1r` splat load instruction. |
| 342 | if (!ST->hasNEON() || NumElements.isScalable()) |
| 343 | return false; |
| 344 | switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { |
| 345 | case 8: |
| 346 | case 16: |
| 347 | case 32: |
| 348 | case 64: { |
| 349 | // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. |
| 350 | unsigned VectorBits = NumElements.getFixedValue() * ElementBits; |
| 351 | return VectorBits >= 64; |
| 352 | } |
| 353 | } |
| 354 | return false; |
| 355 | } |
| 356 | |
| 357 | bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const { |
| 358 | // NOTE: The logic below is mostly geared towards LV, which calls it with |
| 359 | // vectors with 2 elements. We might want to improve that, if other |
| 360 | // users show up. |
| 361 | // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if |
| 362 | // the vector can be halved so that each half fits into a register. That's |
| 363 | // the case if the element type fits into a register and the number of |
| 364 | // elements is a power of 2 > 1. |
| 365 | if (auto *DataTypeTy = dyn_cast<FixedVectorType>(Val: DataType)) { |
| 366 | unsigned NumElements = DataTypeTy->getNumElements(); |
| 367 | unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); |
| 368 | return NumElements > 1 && isPowerOf2_64(Value: NumElements) && EltSize >= 8 && |
| 369 | EltSize <= 128 && isPowerOf2_64(Value: EltSize); |
| 370 | } |
| 371 | return BaseT::isLegalNTStore(DataType, Alignment); |
| 372 | } |
| 373 | |
| 374 | bool isLegalNTStore(Type *DataType, Align Alignment) const override { |
| 375 | return isLegalNTStoreLoad(DataType, Alignment); |
| 376 | } |
| 377 | |
| 378 | bool isLegalNTLoad(Type *DataType, Align Alignment) const override { |
| 379 | // Only supports little-endian targets. |
| 380 | if (ST->isLittleEndian()) |
| 381 | return isLegalNTStoreLoad(DataType, Alignment); |
| 382 | return BaseT::isLegalNTLoad(DataType, Alignment); |
| 383 | } |
| 384 | |
| 385 | InstructionCost getPartialReductionCost( |
| 386 | unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, |
| 387 | ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, |
| 388 | TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp, |
| 389 | TTI::TargetCostKind CostKind) const override; |
| 390 | |
| 391 | bool enableOrderedReductions() const override { return true; } |
| 392 | |
| 393 | InstructionCost getInterleavedMemoryOpCost( |
| 394 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
| 395 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
| 396 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; |
| 397 | |
| 398 | bool shouldConsiderAddressTypePromotion( |
| 399 | const Instruction &I, |
| 400 | bool &) const override; |
| 401 | |
| 402 | bool shouldExpandReduction(const IntrinsicInst *II) const override { |
| 403 | return false; |
| 404 | } |
| 405 | |
| 406 | unsigned getGISelRematGlobalCost() const override { return 2; } |
| 407 | |
| 408 | unsigned getMinTripCountTailFoldingThreshold() const override { |
| 409 | return ST->hasSVE() ? 5 : 0; |
| 410 | } |
| 411 | |
| 412 | TailFoldingStyle |
| 413 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { |
| 414 | if (ST->hasSVE()) |
| 415 | return IVUpdateMayOverflow |
| 416 | ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck |
| 417 | : TailFoldingStyle::DataAndControlFlow; |
| 418 | |
| 419 | return TailFoldingStyle::DataWithoutLaneMask; |
| 420 | } |
| 421 | |
| 422 | bool preferFixedOverScalableIfEqualCost() const override; |
| 423 | |
| 424 | unsigned getEpilogueVectorizationMinVF() const override; |
| 425 | |
| 426 | bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override; |
| 427 | |
| 428 | bool supportsScalableVectors() const override { |
| 429 | return ST->isSVEorStreamingSVEAvailable(); |
| 430 | } |
| 431 | |
| 432 | bool enableScalableVectorization() const override; |
| 433 | |
| 434 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
| 435 | ElementCount VF) const override; |
| 436 | |
| 437 | bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); } |
| 438 | |
| 439 | InstructionCost |
| 440 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
| 441 | std::optional<FastMathFlags> FMF, |
| 442 | TTI::TargetCostKind CostKind) const override; |
| 443 | |
| 444 | InstructionCost |
| 445 | getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, |
| 446 | VectorType *ValTy, std::optional<FastMathFlags> FMF, |
| 447 | TTI::TargetCostKind CostKind) const override; |
| 448 | |
| 449 | InstructionCost getMulAccReductionCost( |
| 450 | bool IsUnsigned, Type *ResTy, VectorType *Ty, |
| 451 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override; |
| 452 | |
| 453 | InstructionCost |
| 454 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, |
| 455 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, |
| 456 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, |
| 457 | const Instruction *CxtI = nullptr) const override; |
| 458 | |
| 459 | InstructionCost getScalarizationOverhead( |
| 460 | VectorType *Ty, const APInt &DemandedElts, bool Insert, bool , |
| 461 | TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, |
| 462 | ArrayRef<Value *> VL = {}) const override; |
| 463 | |
| 464 | /// Return the cost of the scaling factor used in the addressing |
| 465 | /// mode represented by AM for this target, for a load/store |
| 466 | /// of the specified type. |
| 467 | /// If the AM is supported, the return value must be >= 0. |
| 468 | /// If the AM is not supported, it returns an invalid cost. |
| 469 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
| 470 | StackOffset BaseOffset, bool HasBaseReg, |
| 471 | int64_t Scale, |
| 472 | unsigned AddrSpace) const override; |
| 473 | |
| 474 | bool enableSelectOptimize() const override { |
| 475 | return ST->enableSelectOptimize(); |
| 476 | } |
| 477 | |
| 478 | bool shouldTreatInstructionLikeSelect(const Instruction *I) const override; |
| 479 | |
| 480 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, |
| 481 | Type *ScalarValTy) const override { |
| 482 | // We can vectorize store v4i8. |
| 483 | if (ScalarMemTy->isIntegerTy(Bitwidth: 8) && isPowerOf2_32(Value: VF) && VF >= 4) |
| 484 | return 4; |
| 485 | |
| 486 | return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); |
| 487 | } |
| 488 | |
| 489 | std::optional<unsigned> getMinPageSize() const override { return 4096; } |
| 490 | |
| 491 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
| 492 | const TargetTransformInfo::LSRCost &C2) const override; |
| 493 | |
| 494 | bool isProfitableToSinkOperands(Instruction *I, |
| 495 | SmallVectorImpl<Use *> &Ops) const override; |
| 496 | /// @} |
| 497 | }; |
| 498 | |
| 499 | } // end namespace llvm |
| 500 | |
| 501 | #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H |
| 502 | |