| 1 | //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// This file a TargetTransformInfoImplBase conforming object specific to the |
| 10 | /// X86 target machine. It uses the target's detailed information to |
| 11 | /// provide more precise answers to certain TTI queries, while letting the |
| 12 | /// target independent and default TTI implementations handle the rest. |
| 13 | /// |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
| 17 | #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
| 18 | |
| 19 | #include "X86TargetMachine.h" |
| 20 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 21 | #include "llvm/CodeGen/BasicTTIImpl.h" |
| 22 | #include <optional> |
| 23 | |
| 24 | namespace llvm { |
| 25 | |
| 26 | class InstCombiner; |
| 27 | |
| 28 | class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> { |
| 29 | typedef BasicTTIImplBase<X86TTIImpl> BaseT; |
| 30 | typedef TargetTransformInfo TTI; |
| 31 | friend BaseT; |
| 32 | |
| 33 | const X86Subtarget *ST; |
| 34 | const X86TargetLowering *TLI; |
| 35 | |
| 36 | const X86Subtarget *getST() const { return ST; } |
| 37 | const X86TargetLowering *getTLI() const { return TLI; } |
| 38 | |
| 39 | const FeatureBitset InlineFeatureIgnoreList = { |
| 40 | // This indicates the CPU is 64 bit capable not that we are in 64-bit |
| 41 | // mode. |
| 42 | X86::FeatureX86_64, |
| 43 | |
| 44 | // These features don't have any intrinsics or ABI effect. |
| 45 | X86::FeatureNOPL, |
| 46 | X86::FeatureCX16, |
| 47 | X86::FeatureLAHFSAHF64, |
| 48 | |
| 49 | // Some older targets can be setup to fold unaligned loads. |
| 50 | X86::FeatureSSEUnalignedMem, |
| 51 | |
| 52 | // Codegen control options. |
| 53 | X86::TuningFast11ByteNOP, |
| 54 | X86::TuningFast15ByteNOP, |
| 55 | X86::TuningFastBEXTR, |
| 56 | X86::TuningFastHorizontalOps, |
| 57 | X86::TuningFastLZCNT, |
| 58 | X86::TuningFastScalarFSQRT, |
| 59 | X86::TuningFastSHLDRotate, |
| 60 | X86::TuningFastScalarShiftMasks, |
| 61 | X86::TuningFastVectorShiftMasks, |
| 62 | X86::TuningFastVariableCrossLaneShuffle, |
| 63 | X86::TuningFastVariablePerLaneShuffle, |
| 64 | X86::TuningFastVectorFSQRT, |
| 65 | X86::TuningLEAForSP, |
| 66 | X86::TuningLEAUsesAG, |
| 67 | X86::TuningLZCNTFalseDeps, |
| 68 | X86::TuningBranchFusion, |
| 69 | X86::TuningMacroFusion, |
| 70 | X86::TuningPadShortFunctions, |
| 71 | X86::TuningPOPCNTFalseDeps, |
| 72 | X86::TuningMULCFalseDeps, |
| 73 | X86::TuningPERMFalseDeps, |
| 74 | X86::TuningRANGEFalseDeps, |
| 75 | X86::TuningGETMANTFalseDeps, |
| 76 | X86::TuningMULLQFalseDeps, |
| 77 | X86::TuningSlow3OpsLEA, |
| 78 | X86::TuningSlowDivide32, |
| 79 | X86::TuningSlowDivide64, |
| 80 | X86::TuningSlowIncDec, |
| 81 | X86::TuningSlowLEA, |
| 82 | X86::TuningSlowPMADDWD, |
| 83 | X86::TuningSlowPMULLD, |
| 84 | X86::TuningSlowSHLD, |
| 85 | X86::TuningSlowTwoMemOps, |
| 86 | X86::TuningSlowUAMem16, |
| 87 | X86::TuningPreferMaskRegisters, |
| 88 | X86::TuningInsertVZEROUPPER, |
| 89 | X86::TuningUseSLMArithCosts, |
| 90 | X86::TuningUseGLMDivSqrtCosts, |
| 91 | X86::TuningNoDomainDelay, |
| 92 | X86::TuningNoDomainDelayMov, |
| 93 | X86::TuningNoDomainDelayShuffle, |
| 94 | X86::TuningNoDomainDelayBlend, |
| 95 | X86::TuningPreferShiftShuffle, |
| 96 | X86::TuningFastImmVectorShift, |
| 97 | X86::TuningFastDPWSSD, |
| 98 | |
| 99 | // Perf-tuning flags. |
| 100 | X86::TuningFastGather, |
| 101 | X86::TuningSlowUAMem32, |
| 102 | X86::TuningAllowLight256Bit, |
| 103 | |
| 104 | // Based on whether user set the -mprefer-vector-width command line. |
| 105 | X86::TuningPrefer128Bit, |
| 106 | X86::TuningPrefer256Bit, |
| 107 | |
| 108 | // CPU name enums. These just follow CPU string. |
| 109 | X86::ProcIntelAtom |
| 110 | }; |
| 111 | |
| 112 | public: |
| 113 | explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) |
| 114 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
| 115 | TLI(ST->getTargetLowering()) {} |
| 116 | |
| 117 | /// \name Scalar TTI Implementations |
| 118 | /// @{ |
| 119 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; |
| 120 | |
| 121 | /// @} |
| 122 | |
| 123 | /// \name Cache TTI Implementation |
| 124 | /// @{ |
| 125 | std::optional<unsigned> getCacheSize( |
| 126 | TargetTransformInfo::CacheLevel Level) const override; |
| 127 | std::optional<unsigned> getCacheAssociativity( |
| 128 | TargetTransformInfo::CacheLevel Level) const override; |
| 129 | /// @} |
| 130 | |
| 131 | /// \name Vector TTI Implementations |
| 132 | /// @{ |
| 133 | |
| 134 | unsigned getNumberOfRegisters(unsigned ClassID) const override; |
| 135 | bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const override; |
| 136 | TypeSize |
| 137 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; |
| 138 | unsigned getLoadStoreVecRegBitWidth(unsigned AS) const override; |
| 139 | unsigned getMaxInterleaveFactor(ElementCount VF) const override; |
| 140 | InstructionCost getArithmeticInstrCost( |
| 141 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
| 142 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 143 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 144 | ArrayRef<const Value *> Args = {}, |
| 145 | const Instruction *CxtI = nullptr) const override; |
| 146 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, |
| 147 | unsigned Opcode1, |
| 148 | const SmallBitVector &OpcodeMask, |
| 149 | TTI::TargetCostKind CostKind) const override; |
| 150 | |
| 151 | InstructionCost |
| 152 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, |
| 153 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, |
| 154 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, |
| 155 | const Instruction *CxtI = nullptr) const override; |
| 156 | InstructionCost |
| 157 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
| 158 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, |
| 159 | const Instruction *I = nullptr) const override; |
| 160 | InstructionCost getCmpSelInstrCost( |
| 161 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, |
| 162 | TTI::TargetCostKind CostKind, |
| 163 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 164 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 165 | const Instruction *I = nullptr) const override; |
| 166 | using BaseT::getVectorInstrCost; |
| 167 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
| 168 | TTI::TargetCostKind CostKind, |
| 169 | unsigned Index, const Value *Op0, |
| 170 | const Value *Op1) const override; |
| 171 | InstructionCost getScalarizationOverhead( |
| 172 | VectorType *Ty, const APInt &DemandedElts, bool Insert, bool , |
| 173 | TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, |
| 174 | ArrayRef<Value *> VL = {}) const override; |
| 175 | InstructionCost |
| 176 | getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, |
| 177 | const APInt &DemandedDstElts, |
| 178 | TTI::TargetCostKind CostKind) const override; |
| 179 | InstructionCost getMemoryOpCost( |
| 180 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
| 181 | TTI::TargetCostKind CostKind, |
| 182 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
| 183 | const Instruction *I = nullptr) const override; |
| 184 | InstructionCost |
| 185 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
| 186 | unsigned AddressSpace, |
| 187 | TTI::TargetCostKind CostKind) const override; |
| 188 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
| 189 | const Value *Ptr, bool VariableMask, |
| 190 | Align Alignment, |
| 191 | TTI::TargetCostKind CostKind, |
| 192 | const Instruction *I) const override; |
| 193 | InstructionCost |
| 194 | getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, |
| 195 | const TTI::PointersChainInfo &Info, Type *AccessTy, |
| 196 | TTI::TargetCostKind CostKind) const override; |
| 197 | InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, |
| 198 | const SCEV *Ptr) const override; |
| 199 | |
| 200 | std::optional<Instruction *> |
| 201 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; |
| 202 | std::optional<Value *> |
| 203 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
| 204 | APInt DemandedMask, KnownBits &Known, |
| 205 | bool &KnownBitsComputed) const override; |
| 206 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
| 207 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
| 208 | APInt &UndefElts2, APInt &UndefElts3, |
| 209 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
| 210 | SimplifyAndSetOp) const override; |
| 211 | |
| 212 | unsigned getAtomicMemIntrinsicMaxElementSize() const override; |
| 213 | |
| 214 | InstructionCost |
| 215 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
| 216 | TTI::TargetCostKind CostKind) const override; |
| 217 | |
| 218 | InstructionCost |
| 219 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
| 220 | std::optional<FastMathFlags> FMF, |
| 221 | TTI::TargetCostKind CostKind) const override; |
| 222 | |
| 223 | InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, |
| 224 | TTI::TargetCostKind CostKind, |
| 225 | FastMathFlags FMF) const; |
| 226 | |
| 227 | InstructionCost |
| 228 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, |
| 229 | TTI::TargetCostKind CostKind) const override; |
| 230 | |
| 231 | InstructionCost getInterleavedMemoryOpCost( |
| 232 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
| 233 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
| 234 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; |
| 235 | InstructionCost getInterleavedMemoryOpCostAVX512( |
| 236 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
| 237 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
| 238 | TTI::TargetCostKind CostKind, bool UseMaskForCond = false, |
| 239 | bool UseMaskForGaps = false) const; |
| 240 | |
| 241 | InstructionCost getIntImmCost(int64_t) const; |
| 242 | |
| 243 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
| 244 | TTI::TargetCostKind CostKind) const override; |
| 245 | |
| 246 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
| 247 | const Instruction *I = nullptr) const override; |
| 248 | |
| 249 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
| 250 | const APInt &Imm, Type *Ty, |
| 251 | TTI::TargetCostKind CostKind, |
| 252 | Instruction *Inst = nullptr) const override; |
| 253 | InstructionCost |
| 254 | getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, |
| 255 | Type *Ty, TTI::TargetCostKind CostKind) const override; |
| 256 | /// Return the cost of the scaling factor used in the addressing |
| 257 | /// mode represented by AM for this target, for a load/store |
| 258 | /// of the specified type. |
| 259 | /// If the AM is supported, the return value must be >= 0. |
| 260 | /// If the AM is not supported, it returns an invalid cost. |
| 261 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
| 262 | StackOffset BaseOffset, bool HasBaseReg, |
| 263 | int64_t Scale, |
| 264 | unsigned AddrSpace) const override; |
| 265 | |
| 266 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
| 267 | const TargetTransformInfo::LSRCost &C2) const override; |
| 268 | bool canMacroFuseCmp() const override; |
| 269 | bool isLegalMaskedLoad(Type *DataType, Align Alignment, |
| 270 | unsigned AddressSpace) const override; |
| 271 | bool isLegalMaskedStore(Type *DataType, Align Alignment, |
| 272 | unsigned AddressSpace) const override; |
| 273 | bool isLegalNTLoad(Type *DataType, Align Alignment) const override; |
| 274 | bool isLegalNTStore(Type *DataType, Align Alignment) const override; |
| 275 | bool isLegalBroadcastLoad(Type *ElementTy, |
| 276 | ElementCount NumElements) const override; |
| 277 | bool forceScalarizeMaskedGather(VectorType *VTy, |
| 278 | Align Alignment) const override; |
| 279 | bool forceScalarizeMaskedScatter(VectorType *VTy, |
| 280 | Align Alignment) const override { |
| 281 | return forceScalarizeMaskedGather(VTy, Alignment); |
| 282 | } |
| 283 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) const; |
| 284 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const override; |
| 285 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override; |
| 286 | bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override; |
| 287 | bool isLegalMaskedCompressStore(Type *DataType, |
| 288 | Align Alignment) const override; |
| 289 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, |
| 290 | const SmallBitVector &OpcodeMask) const override; |
| 291 | bool hasDivRemOp(Type *DataType, bool IsSigned) const override; |
| 292 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override; |
| 293 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override; |
| 294 | bool areInlineCompatible(const Function *Caller, |
| 295 | const Function *Callee) const override; |
| 296 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
| 297 | const ArrayRef<Type *> &Type) const override; |
| 298 | |
| 299 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override { |
| 300 | return ST->getMaxInlineSizeThreshold(); |
| 301 | } |
| 302 | |
| 303 | TTI::MemCmpExpansionOptions |
| 304 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; |
| 305 | bool preferAlternateOpcodeVectorization() const override { return false; } |
| 306 | bool prefersVectorizedAddressing() const override; |
| 307 | bool supportsEfficientVectorElementLoadStore() const override; |
| 308 | bool enableInterleavedAccessVectorization() const override; |
| 309 | |
| 310 | InstructionCost getBranchMispredictPenalty() const override; |
| 311 | |
| 312 | bool isProfitableToSinkOperands(Instruction *I, |
| 313 | SmallVectorImpl<Use *> &Ops) const override; |
| 314 | |
| 315 | bool isVectorShiftByScalarCheap(Type *Ty) const override; |
| 316 | |
| 317 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, |
| 318 | Type *ScalarValTy) const override; |
| 319 | |
| 320 | private: |
| 321 | bool supportsGather() const; |
| 322 | InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
| 323 | Type *DataTy, const Value *Ptr, |
| 324 | Align Alignment, unsigned AddressSpace) const; |
| 325 | |
| 326 | int getGatherOverhead() const; |
| 327 | int getScatterOverhead() const; |
| 328 | |
| 329 | /// @} |
| 330 | }; |
| 331 | |
| 332 | } // end namespace llvm |
| 333 | |
| 334 | #endif |
| 335 | |