1 | //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file a TargetTransformInfo::Concept conforming object specific to the |
10 | /// X86 target machine. It uses the target's detailed information to |
11 | /// provide more precise answers to certain TTI queries, while letting the |
12 | /// target independent and default TTI implementations handle the rest. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
17 | #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
18 | |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/BasicTTIImpl.h" |
22 | #include <optional> |
23 | |
24 | namespace llvm { |
25 | |
26 | class InstCombiner; |
27 | |
28 | class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { |
29 | typedef BasicTTIImplBase<X86TTIImpl> BaseT; |
30 | typedef TargetTransformInfo TTI; |
31 | friend BaseT; |
32 | |
33 | const X86Subtarget *ST; |
34 | const X86TargetLowering *TLI; |
35 | |
36 | const X86Subtarget *getST() const { return ST; } |
37 | const X86TargetLowering *getTLI() const { return TLI; } |
38 | |
39 | const FeatureBitset InlineFeatureIgnoreList = { |
40 | // This indicates the CPU is 64 bit capable not that we are in 64-bit |
41 | // mode. |
42 | X86::FeatureX86_64, |
43 | |
44 | // These features don't have any intrinsics or ABI effect. |
45 | X86::FeatureNOPL, |
46 | X86::FeatureCX16, |
47 | X86::FeatureLAHFSAHF64, |
48 | |
49 | // Some older targets can be setup to fold unaligned loads. |
50 | X86::FeatureSSEUnalignedMem, |
51 | |
52 | // Codegen control options. |
53 | X86::TuningFast11ByteNOP, |
54 | X86::TuningFast15ByteNOP, |
55 | X86::TuningFastBEXTR, |
56 | X86::TuningFastHorizontalOps, |
57 | X86::TuningFastLZCNT, |
58 | X86::TuningFastScalarFSQRT, |
59 | X86::TuningFastSHLDRotate, |
60 | X86::TuningFastScalarShiftMasks, |
61 | X86::TuningFastVectorShiftMasks, |
62 | X86::TuningFastVariableCrossLaneShuffle, |
63 | X86::TuningFastVariablePerLaneShuffle, |
64 | X86::TuningFastVectorFSQRT, |
65 | X86::TuningLEAForSP, |
66 | X86::TuningLEAUsesAG, |
67 | X86::TuningLZCNTFalseDeps, |
68 | X86::TuningBranchFusion, |
69 | X86::TuningMacroFusion, |
70 | X86::TuningPadShortFunctions, |
71 | X86::TuningPOPCNTFalseDeps, |
72 | X86::TuningMULCFalseDeps, |
73 | X86::TuningPERMFalseDeps, |
74 | X86::TuningRANGEFalseDeps, |
75 | X86::TuningGETMANTFalseDeps, |
76 | X86::TuningMULLQFalseDeps, |
77 | X86::TuningSlow3OpsLEA, |
78 | X86::TuningSlowDivide32, |
79 | X86::TuningSlowDivide64, |
80 | X86::TuningSlowIncDec, |
81 | X86::TuningSlowLEA, |
82 | X86::TuningSlowPMADDWD, |
83 | X86::TuningSlowPMULLD, |
84 | X86::TuningSlowSHLD, |
85 | X86::TuningSlowTwoMemOps, |
86 | X86::TuningSlowUAMem16, |
87 | X86::TuningPreferMaskRegisters, |
88 | X86::TuningInsertVZEROUPPER, |
89 | X86::TuningUseSLMArithCosts, |
90 | X86::TuningUseGLMDivSqrtCosts, |
91 | X86::TuningNoDomainDelay, |
92 | X86::TuningNoDomainDelayMov, |
93 | X86::TuningNoDomainDelayShuffle, |
94 | X86::TuningNoDomainDelayBlend, |
95 | X86::TuningPreferShiftShuffle, |
96 | X86::TuningFastImmVectorShift, |
97 | X86::TuningFastDPWSSD, |
98 | |
99 | // Perf-tuning flags. |
100 | X86::TuningFastGather, |
101 | X86::TuningSlowUAMem32, |
102 | X86::TuningAllowLight256Bit, |
103 | |
104 | // Based on whether user set the -mprefer-vector-width command line. |
105 | X86::TuningPrefer128Bit, |
106 | X86::TuningPrefer256Bit, |
107 | |
108 | // CPU name enums. These just follow CPU string. |
109 | X86::ProcIntelAtom |
110 | }; |
111 | |
112 | public: |
113 | explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) |
114 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
115 | TLI(ST->getTargetLowering()) {} |
116 | |
117 | /// \name Scalar TTI Implementations |
118 | /// @{ |
119 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); |
120 | |
121 | /// @} |
122 | |
123 | /// \name Cache TTI Implementation |
124 | /// @{ |
125 | std::optional<unsigned> getCacheSize( |
126 | TargetTransformInfo::CacheLevel Level) const override; |
127 | std::optional<unsigned> getCacheAssociativity( |
128 | TargetTransformInfo::CacheLevel Level) const override; |
129 | /// @} |
130 | |
131 | /// \name Vector TTI Implementations |
132 | /// @{ |
133 | |
134 | unsigned getNumberOfRegisters(unsigned ClassID) const; |
135 | bool hasConditionalLoadStoreForType(Type *Ty = nullptr) const; |
136 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; |
137 | unsigned getLoadStoreVecRegBitWidth(unsigned AS) const; |
138 | unsigned getMaxInterleaveFactor(ElementCount VF); |
139 | InstructionCost getArithmeticInstrCost( |
140 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
141 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
142 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
143 | ArrayRef<const Value *> Args = std::nullopt, |
144 | const Instruction *CxtI = nullptr); |
145 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, |
146 | unsigned Opcode1, |
147 | const SmallBitVector &OpcodeMask, |
148 | TTI::TargetCostKind CostKind) const; |
149 | |
150 | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, |
151 | ArrayRef<int> Mask, |
152 | TTI::TargetCostKind CostKind, int Index, |
153 | VectorType *SubTp, |
154 | ArrayRef<const Value *> Args = std::nullopt, |
155 | const Instruction *CxtI = nullptr); |
156 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
157 | TTI::CastContextHint CCH, |
158 | TTI::TargetCostKind CostKind, |
159 | const Instruction *I = nullptr); |
160 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
161 | CmpInst::Predicate VecPred, |
162 | TTI::TargetCostKind CostKind, |
163 | const Instruction *I = nullptr); |
164 | using BaseT::getVectorInstrCost; |
165 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
166 | TTI::TargetCostKind CostKind, |
167 | unsigned Index, Value *Op0, Value *Op1); |
168 | InstructionCost getScalarizationOverhead(VectorType *Ty, |
169 | const APInt &DemandedElts, |
170 | bool Insert, bool , |
171 | TTI::TargetCostKind CostKind); |
172 | InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, |
173 | int VF, |
174 | const APInt &DemandedDstElts, |
175 | TTI::TargetCostKind CostKind); |
176 | InstructionCost |
177 | getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
178 | unsigned AddressSpace, TTI::TargetCostKind CostKind, |
179 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
180 | const Instruction *I = nullptr); |
181 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
182 | Align Alignment, unsigned AddressSpace, |
183 | TTI::TargetCostKind CostKind); |
184 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
185 | const Value *Ptr, bool VariableMask, |
186 | Align Alignment, |
187 | TTI::TargetCostKind CostKind, |
188 | const Instruction *I); |
189 | InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, |
190 | const Value *Base, |
191 | const TTI::PointersChainInfo &Info, |
192 | Type *AccessTy, |
193 | TTI::TargetCostKind CostKind); |
194 | InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, |
195 | const SCEV *Ptr); |
196 | |
197 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
198 | IntrinsicInst &II) const; |
199 | std::optional<Value *> |
200 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
201 | APInt DemandedMask, KnownBits &Known, |
202 | bool &KnownBitsComputed) const; |
203 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
204 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
205 | APInt &UndefElts2, APInt &UndefElts3, |
206 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
207 | SimplifyAndSetOp) const; |
208 | |
209 | unsigned getAtomicMemIntrinsicMaxElementSize() const; |
210 | |
211 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
212 | TTI::TargetCostKind CostKind); |
213 | |
214 | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
215 | std::optional<FastMathFlags> FMF, |
216 | TTI::TargetCostKind CostKind); |
217 | |
218 | InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, |
219 | TTI::TargetCostKind CostKind, |
220 | FastMathFlags FMF); |
221 | |
222 | InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, |
223 | FastMathFlags FMF, |
224 | TTI::TargetCostKind CostKind); |
225 | |
226 | InstructionCost getInterleavedMemoryOpCost( |
227 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
228 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
229 | bool UseMaskForCond = false, bool UseMaskForGaps = false); |
230 | InstructionCost getInterleavedMemoryOpCostAVX512( |
231 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
232 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
233 | TTI::TargetCostKind CostKind, bool UseMaskForCond = false, |
234 | bool UseMaskForGaps = false); |
235 | |
236 | InstructionCost getIntImmCost(int64_t); |
237 | |
238 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
239 | TTI::TargetCostKind CostKind); |
240 | |
241 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
242 | const Instruction *I = nullptr); |
243 | |
244 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
245 | const APInt &Imm, Type *Ty, |
246 | TTI::TargetCostKind CostKind, |
247 | Instruction *Inst = nullptr); |
248 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
249 | const APInt &Imm, Type *Ty, |
250 | TTI::TargetCostKind CostKind); |
251 | /// Return the cost of the scaling factor used in the addressing |
252 | /// mode represented by AM for this target, for a load/store |
253 | /// of the specified type. |
254 | /// If the AM is supported, the return value must be >= 0. |
255 | /// If the AM is not supported, it returns a negative value. |
256 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
257 | StackOffset BaseOffset, bool HasBaseReg, |
258 | int64_t Scale, unsigned AddrSpace) const; |
259 | |
260 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
261 | const TargetTransformInfo::LSRCost &C2); |
262 | bool canMacroFuseCmp(); |
263 | bool isLegalMaskedLoad(Type *DataType, Align Alignment); |
264 | bool isLegalMaskedStore(Type *DataType, Align Alignment); |
265 | bool isLegalNTLoad(Type *DataType, Align Alignment); |
266 | bool isLegalNTStore(Type *DataType, Align Alignment); |
267 | bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; |
268 | bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment); |
269 | bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { |
270 | return forceScalarizeMaskedGather(VTy, Alignment); |
271 | } |
272 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment); |
273 | bool isLegalMaskedGather(Type *DataType, Align Alignment); |
274 | bool isLegalMaskedScatter(Type *DataType, Align Alignment); |
275 | bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment); |
276 | bool isLegalMaskedCompressStore(Type *DataType, Align Alignment); |
277 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, |
278 | const SmallBitVector &OpcodeMask) const; |
279 | bool hasDivRemOp(Type *DataType, bool IsSigned); |
280 | bool isExpensiveToSpeculativelyExecute(const Instruction *I); |
281 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); |
282 | bool areInlineCompatible(const Function *Caller, |
283 | const Function *Callee) const; |
284 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
285 | const ArrayRef<Type *> &Type) const; |
286 | |
287 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { |
288 | return ST->getMaxInlineSizeThreshold(); |
289 | } |
290 | |
291 | TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, |
292 | bool IsZeroCmp) const; |
293 | bool prefersVectorizedAddressing() const; |
294 | bool supportsEfficientVectorElementLoadStore() const; |
295 | bool enableInterleavedAccessVectorization(); |
296 | |
297 | InstructionCost getBranchMispredictPenalty() const; |
298 | |
299 | private: |
300 | bool supportsGather() const; |
301 | InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
302 | Type *DataTy, const Value *Ptr, |
303 | Align Alignment, unsigned AddressSpace); |
304 | |
305 | int getGatherOverhead() const; |
306 | int getScatterOverhead() const; |
307 | |
308 | /// @} |
309 | }; |
310 | |
311 | } // end namespace llvm |
312 | |
313 | #endif |
314 | |