| 1 | //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | /// \file | 
|---|
| 9 | /// This file a TargetTransformInfoImplBase conforming object specific to the | 
|---|
| 10 | /// X86 target machine. It uses the target's detailed information to | 
|---|
| 11 | /// provide more precise answers to certain TTI queries, while letting the | 
|---|
| 12 | /// target independent and default TTI implementations handle the rest. | 
|---|
| 13 | /// | 
|---|
| 14 | //===----------------------------------------------------------------------===// | 
|---|
| 15 |  | 
|---|
| 16 | #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H | 
|---|
| 17 | #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H | 
|---|
| 18 |  | 
|---|
| 19 | #include "X86TargetMachine.h" | 
|---|
| 20 | #include "llvm/Analysis/TargetTransformInfo.h" | 
|---|
| 21 | #include "llvm/CodeGen/BasicTTIImpl.h" | 
|---|
| 22 | #include <optional> | 
|---|
| 23 |  | 
|---|
| 24 | namespace llvm { | 
|---|
| 25 |  | 
|---|
| 26 | class InstCombiner; | 
|---|
| 27 |  | 
|---|
| 28 | class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> { | 
|---|
| 29 | typedef BasicTTIImplBase<X86TTIImpl> BaseT; | 
|---|
| 30 | typedef TargetTransformInfo TTI; | 
|---|
| 31 | friend BaseT; | 
|---|
| 32 |  | 
|---|
| 33 | const X86Subtarget *ST; | 
|---|
| 34 | const X86TargetLowering *TLI; | 
|---|
| 35 |  | 
|---|
| 36 | const X86Subtarget *getST() const { return ST; } | 
|---|
| 37 | const X86TargetLowering *getTLI() const { return TLI; } | 
|---|
| 38 |  | 
|---|
| 39 | const FeatureBitset InlineFeatureIgnoreList = { | 
|---|
| 40 | // This indicates the CPU is 64 bit capable not that we are in 64-bit | 
|---|
| 41 | // mode. | 
|---|
| 42 | X86::FeatureX86_64, | 
|---|
| 43 |  | 
|---|
| 44 | // These features don't have any intrinsics or ABI effect. | 
|---|
| 45 | X86::FeatureNOPL, | 
|---|
| 46 | X86::FeatureCX16, | 
|---|
| 47 | X86::FeatureLAHFSAHF64, | 
|---|
| 48 |  | 
|---|
| 49 | // Some older targets can be setup to fold unaligned loads. | 
|---|
| 50 | X86::FeatureSSEUnalignedMem, | 
|---|
| 51 |  | 
|---|
| 52 | // Codegen control options. | 
|---|
| 53 | X86::TuningFast11ByteNOP, | 
|---|
| 54 | X86::TuningFast15ByteNOP, | 
|---|
| 55 | X86::TuningFastBEXTR, | 
|---|
| 56 | X86::TuningFastHorizontalOps, | 
|---|
| 57 | X86::TuningFastLZCNT, | 
|---|
| 58 | X86::TuningFastScalarFSQRT, | 
|---|
| 59 | X86::TuningFastSHLDRotate, | 
|---|
| 60 | X86::TuningFastScalarShiftMasks, | 
|---|
| 61 | X86::TuningFastVectorShiftMasks, | 
|---|
| 62 | X86::TuningFastVariableCrossLaneShuffle, | 
|---|
| 63 | X86::TuningFastVariablePerLaneShuffle, | 
|---|
| 64 | X86::TuningFastVectorFSQRT, | 
|---|
| 65 | X86::TuningLEAForSP, | 
|---|
| 66 | X86::TuningLEAUsesAG, | 
|---|
| 67 | X86::TuningLZCNTFalseDeps, | 
|---|
| 68 | X86::TuningBranchFusion, | 
|---|
| 69 | X86::TuningMacroFusion, | 
|---|
| 70 | X86::TuningPadShortFunctions, | 
|---|
| 71 | X86::TuningPOPCNTFalseDeps, | 
|---|
| 72 | X86::TuningMULCFalseDeps, | 
|---|
| 73 | X86::TuningPERMFalseDeps, | 
|---|
| 74 | X86::TuningRANGEFalseDeps, | 
|---|
| 75 | X86::TuningGETMANTFalseDeps, | 
|---|
| 76 | X86::TuningMULLQFalseDeps, | 
|---|
| 77 | X86::TuningSlow3OpsLEA, | 
|---|
| 78 | X86::TuningSlowDivide32, | 
|---|
| 79 | X86::TuningSlowDivide64, | 
|---|
| 80 | X86::TuningSlowIncDec, | 
|---|
| 81 | X86::TuningSlowLEA, | 
|---|
| 82 | X86::TuningSlowPMADDWD, | 
|---|
| 83 | X86::TuningSlowPMULLD, | 
|---|
| 84 | X86::TuningSlowSHLD, | 
|---|
| 85 | X86::TuningSlowTwoMemOps, | 
|---|
| 86 | X86::TuningSlowUAMem16, | 
|---|
| 87 | X86::TuningPreferMaskRegisters, | 
|---|
| 88 | X86::TuningInsertVZEROUPPER, | 
|---|
| 89 | X86::TuningUseSLMArithCosts, | 
|---|
| 90 | X86::TuningUseGLMDivSqrtCosts, | 
|---|
| 91 | X86::TuningNoDomainDelay, | 
|---|
| 92 | X86::TuningNoDomainDelayMov, | 
|---|
| 93 | X86::TuningNoDomainDelayShuffle, | 
|---|
| 94 | X86::TuningNoDomainDelayBlend, | 
|---|
| 95 | X86::TuningPreferShiftShuffle, | 
|---|
| 96 | X86::TuningFastImmVectorShift, | 
|---|
| 97 | X86::TuningFastDPWSSD, | 
|---|
| 98 |  | 
|---|
| 99 | // Perf-tuning flags. | 
|---|
| 100 | X86::TuningFastGather, | 
|---|
| 101 | X86::TuningSlowUAMem32, | 
|---|
| 102 | X86::TuningAllowLight256Bit, | 
|---|
| 103 |  | 
|---|
| 104 | // Based on whether user set the -mprefer-vector-width command line. | 
|---|
| 105 | X86::TuningPrefer128Bit, | 
|---|
| 106 | X86::TuningPrefer256Bit, | 
|---|
| 107 |  | 
|---|
| 108 | // CPU name enums. These just follow CPU string. | 
|---|
| 109 | X86::ProcIntelAtom | 
|---|
| 110 | }; | 
|---|
| 111 |  | 
|---|
| 112 | public: | 
|---|
| 113 | explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) | 
|---|
| 114 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), | 
|---|
| 115 | TLI(ST->getTargetLowering()) {} | 
|---|
| 116 |  | 
|---|
| 117 | /// \name Scalar TTI Implementations | 
|---|
| 118 | /// @{ | 
|---|
| 119 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; | 
|---|
| 120 |  | 
|---|
| 121 | /// @} | 
|---|
| 122 |  | 
|---|
| 123 | /// \name Cache TTI Implementation | 
|---|
| 124 | /// @{ | 
|---|
| 125 | std::optional<unsigned> getCacheSize( | 
|---|
| 126 | TargetTransformInfo::CacheLevel Level) const override; | 
|---|
| 127 | std::optional<unsigned> getCacheAssociativity( | 
|---|
| 128 | TargetTransformInfo::CacheLevel Level) const override; | 
|---|
| 129 | /// @} | 
|---|
| 130 |  | 
|---|
| 131 | /// \name Vector TTI Implementations | 
|---|
| 132 | /// @{ | 
|---|
| 133 |  | 
|---|
| 134 | unsigned getNumberOfRegisters(unsigned ClassID) const override; | 
|---|
| 135 | bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const override; | 
|---|
| 136 | TypeSize | 
|---|
| 137 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; | 
|---|
| 138 | unsigned getLoadStoreVecRegBitWidth(unsigned AS) const override; | 
|---|
| 139 | unsigned getMaxInterleaveFactor(ElementCount VF) const override; | 
|---|
| 140 | InstructionCost getArithmeticInstrCost( | 
|---|
| 141 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, | 
|---|
| 142 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, | 
|---|
| 143 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, | 
|---|
| 144 | ArrayRef<const Value *> Args = {}, | 
|---|
| 145 | const Instruction *CxtI = nullptr) const override; | 
|---|
| 146 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, | 
|---|
| 147 | unsigned Opcode1, | 
|---|
| 148 | const SmallBitVector &OpcodeMask, | 
|---|
| 149 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 150 |  | 
|---|
| 151 | InstructionCost | 
|---|
| 152 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, | 
|---|
| 153 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, | 
|---|
| 154 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, | 
|---|
| 155 | const Instruction *CxtI = nullptr) const override; | 
|---|
| 156 | InstructionCost | 
|---|
| 157 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | 
|---|
| 158 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, | 
|---|
| 159 | const Instruction *I = nullptr) const override; | 
|---|
| 160 | InstructionCost getCmpSelInstrCost( | 
|---|
| 161 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, | 
|---|
| 162 | TTI::TargetCostKind CostKind, | 
|---|
| 163 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, | 
|---|
| 164 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, | 
|---|
| 165 | const Instruction *I = nullptr) const override; | 
|---|
| 166 | using BaseT::getVectorInstrCost; | 
|---|
| 167 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, | 
|---|
| 168 | TTI::TargetCostKind CostKind, | 
|---|
| 169 | unsigned Index, const Value *Op0, | 
|---|
| 170 | const Value *Op1) const override; | 
|---|
| 171 | InstructionCost getScalarizationOverhead( | 
|---|
| 172 | VectorType *Ty, const APInt &DemandedElts, bool Insert, bool , | 
|---|
| 173 | TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, | 
|---|
| 174 | ArrayRef<Value *> VL = {}) const override; | 
|---|
| 175 | InstructionCost | 
|---|
| 176 | getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, | 
|---|
| 177 | const APInt &DemandedDstElts, | 
|---|
| 178 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 179 | InstructionCost getMemoryOpCost( | 
|---|
| 180 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, | 
|---|
| 181 | TTI::TargetCostKind CostKind, | 
|---|
| 182 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, | 
|---|
| 183 | const Instruction *I = nullptr) const override; | 
|---|
| 184 | InstructionCost | 
|---|
| 185 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | 
|---|
| 186 | unsigned AddressSpace, | 
|---|
| 187 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 188 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, | 
|---|
| 189 | const Value *Ptr, bool VariableMask, | 
|---|
| 190 | Align Alignment, | 
|---|
| 191 | TTI::TargetCostKind CostKind, | 
|---|
| 192 | const Instruction *I) const override; | 
|---|
| 193 | InstructionCost | 
|---|
| 194 | getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, | 
|---|
| 195 | const TTI::PointersChainInfo &Info, Type *AccessTy, | 
|---|
| 196 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 197 | InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, | 
|---|
| 198 | const SCEV *Ptr) const override; | 
|---|
| 199 |  | 
|---|
| 200 | std::optional<Instruction *> | 
|---|
| 201 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; | 
|---|
| 202 | std::optional<Value *> | 
|---|
| 203 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, | 
|---|
| 204 | APInt DemandedMask, KnownBits &Known, | 
|---|
| 205 | bool &KnownBitsComputed) const override; | 
|---|
| 206 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( | 
|---|
| 207 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, | 
|---|
| 208 | APInt &UndefElts2, APInt &UndefElts3, | 
|---|
| 209 | std::function<void(Instruction *, unsigned, APInt, APInt &)> | 
|---|
| 210 | SimplifyAndSetOp) const override; | 
|---|
| 211 |  | 
|---|
| 212 | unsigned getAtomicMemIntrinsicMaxElementSize() const override; | 
|---|
| 213 |  | 
|---|
| 214 | InstructionCost | 
|---|
| 215 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | 
|---|
| 216 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 217 |  | 
|---|
| 218 | InstructionCost | 
|---|
| 219 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | 
|---|
| 220 | std::optional<FastMathFlags> FMF, | 
|---|
| 221 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 222 |  | 
|---|
| 223 | InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, | 
|---|
| 224 | TTI::TargetCostKind CostKind, | 
|---|
| 225 | FastMathFlags FMF) const; | 
|---|
| 226 |  | 
|---|
| 227 | InstructionCost | 
|---|
| 228 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, | 
|---|
| 229 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 230 |  | 
|---|
| 231 | InstructionCost getInterleavedMemoryOpCost( | 
|---|
| 232 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | 
|---|
| 233 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, | 
|---|
| 234 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; | 
|---|
| 235 | InstructionCost getInterleavedMemoryOpCostAVX512( | 
|---|
| 236 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, | 
|---|
| 237 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, | 
|---|
| 238 | TTI::TargetCostKind CostKind, bool UseMaskForCond = false, | 
|---|
| 239 | bool UseMaskForGaps = false) const; | 
|---|
| 240 |  | 
|---|
| 241 | InstructionCost getIntImmCost(int64_t) const; | 
|---|
| 242 |  | 
|---|
| 243 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, | 
|---|
| 244 | TTI::TargetCostKind CostKind) const override; | 
|---|
| 245 |  | 
|---|
| 246 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, | 
|---|
| 247 | const Instruction *I = nullptr) const override; | 
|---|
| 248 |  | 
|---|
| 249 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, | 
|---|
| 250 | const APInt &Imm, Type *Ty, | 
|---|
| 251 | TTI::TargetCostKind CostKind, | 
|---|
| 252 | Instruction *Inst = nullptr) const override; | 
|---|
| 253 | InstructionCost | 
|---|
| 254 | getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, | 
|---|
| 255 | Type *Ty, TTI::TargetCostKind CostKind) const override; | 
|---|
| 256 | /// Return the cost of the scaling factor used in the addressing | 
|---|
| 257 | /// mode represented by AM for this target, for a load/store | 
|---|
| 258 | /// of the specified type. | 
|---|
| 259 | /// If the AM is supported, the return value must be >= 0. | 
|---|
| 260 | /// If the AM is not supported, it returns an invalid cost. | 
|---|
| 261 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, | 
|---|
| 262 | StackOffset BaseOffset, bool HasBaseReg, | 
|---|
| 263 | int64_t Scale, | 
|---|
| 264 | unsigned AddrSpace) const override; | 
|---|
| 265 |  | 
|---|
| 266 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, | 
|---|
| 267 | const TargetTransformInfo::LSRCost &C2) const override; | 
|---|
| 268 | bool canMacroFuseCmp() const override; | 
|---|
| 269 | bool isLegalMaskedLoad(Type *DataType, Align Alignment, | 
|---|
| 270 | unsigned AddressSpace) const override; | 
|---|
| 271 | bool isLegalMaskedStore(Type *DataType, Align Alignment, | 
|---|
| 272 | unsigned AddressSpace) const override; | 
|---|
| 273 | bool isLegalNTLoad(Type *DataType, Align Alignment) const override; | 
|---|
| 274 | bool isLegalNTStore(Type *DataType, Align Alignment) const override; | 
|---|
| 275 | bool isLegalBroadcastLoad(Type *ElementTy, | 
|---|
| 276 | ElementCount NumElements) const override; | 
|---|
| 277 | bool forceScalarizeMaskedGather(VectorType *VTy, | 
|---|
| 278 | Align Alignment) const override; | 
|---|
| 279 | bool forceScalarizeMaskedScatter(VectorType *VTy, | 
|---|
| 280 | Align Alignment) const override { | 
|---|
| 281 | return forceScalarizeMaskedGather(VTy, Alignment); | 
|---|
| 282 | } | 
|---|
| 283 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) const; | 
|---|
| 284 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const override; | 
|---|
| 285 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override; | 
|---|
| 286 | bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override; | 
|---|
| 287 | bool isLegalMaskedCompressStore(Type *DataType, | 
|---|
| 288 | Align Alignment) const override; | 
|---|
| 289 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, | 
|---|
| 290 | const SmallBitVector &OpcodeMask) const override; | 
|---|
| 291 | bool hasDivRemOp(Type *DataType, bool IsSigned) const override; | 
|---|
| 292 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override; | 
|---|
| 293 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override; | 
|---|
| 294 | bool areInlineCompatible(const Function *Caller, | 
|---|
| 295 | const Function *Callee) const override; | 
|---|
| 296 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, | 
|---|
| 297 | const ArrayRef<Type *> &Type) const override; | 
|---|
| 298 |  | 
|---|
| 299 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override { | 
|---|
| 300 | return ST->getMaxInlineSizeThreshold(); | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | TTI::MemCmpExpansionOptions | 
|---|
| 304 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; | 
|---|
| 305 | bool preferAlternateOpcodeVectorization() const override { return false; } | 
|---|
| 306 | bool prefersVectorizedAddressing() const override; | 
|---|
| 307 | bool supportsEfficientVectorElementLoadStore() const override; | 
|---|
| 308 | bool enableInterleavedAccessVectorization() const override; | 
|---|
| 309 |  | 
|---|
| 310 | InstructionCost getBranchMispredictPenalty() const override; | 
|---|
| 311 |  | 
|---|
| 312 | bool isProfitableToSinkOperands(Instruction *I, | 
|---|
| 313 | SmallVectorImpl<Use *> &Ops) const override; | 
|---|
| 314 |  | 
|---|
| 315 | bool isVectorShiftByScalarCheap(Type *Ty) const override; | 
|---|
| 316 |  | 
|---|
| 317 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, | 
|---|
| 318 | Type *ScalarValTy) const override; | 
|---|
| 319 |  | 
|---|
| 320 | private: | 
|---|
| 321 | bool supportsGather() const; | 
|---|
| 322 | InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, | 
|---|
| 323 | Type *DataTy, const Value *Ptr, | 
|---|
| 324 | Align Alignment, unsigned AddressSpace) const; | 
|---|
| 325 |  | 
|---|
| 326 | int getGatherOverhead() const; | 
|---|
| 327 | int getScatterOverhead() const; | 
|---|
| 328 |  | 
|---|
| 329 | /// @} | 
|---|
| 330 | }; | 
|---|
| 331 |  | 
|---|
| 332 | } // end namespace llvm | 
|---|
| 333 |  | 
|---|
| 334 | #endif | 
|---|
| 335 |  | 
|---|