1 | //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file a TargetTransformInfoImplBase conforming object specific to the |
10 | /// X86 target machine. It uses the target's detailed information to |
11 | /// provide more precise answers to certain TTI queries, while letting the |
12 | /// target independent and default TTI implementations handle the rest. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
17 | #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H |
18 | |
19 | #include "X86TargetMachine.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/BasicTTIImpl.h" |
22 | #include <optional> |
23 | |
24 | namespace llvm { |
25 | |
26 | class InstCombiner; |
27 | |
28 | class X86TTIImpl final : public BasicTTIImplBase<X86TTIImpl> { |
29 | typedef BasicTTIImplBase<X86TTIImpl> BaseT; |
30 | typedef TargetTransformInfo TTI; |
31 | friend BaseT; |
32 | |
33 | const X86Subtarget *ST; |
34 | const X86TargetLowering *TLI; |
35 | |
36 | const X86Subtarget *getST() const { return ST; } |
37 | const X86TargetLowering *getTLI() const { return TLI; } |
38 | |
39 | const FeatureBitset InlineFeatureIgnoreList = { |
40 | // This indicates the CPU is 64 bit capable not that we are in 64-bit |
41 | // mode. |
42 | X86::FeatureX86_64, |
43 | |
44 | // These features don't have any intrinsics or ABI effect. |
45 | X86::FeatureNOPL, |
46 | X86::FeatureCX16, |
47 | X86::FeatureLAHFSAHF64, |
48 | |
49 | // Some older targets can be setup to fold unaligned loads. |
50 | X86::FeatureSSEUnalignedMem, |
51 | |
52 | // Codegen control options. |
53 | X86::TuningFast11ByteNOP, |
54 | X86::TuningFast15ByteNOP, |
55 | X86::TuningFastBEXTR, |
56 | X86::TuningFastHorizontalOps, |
57 | X86::TuningFastLZCNT, |
58 | X86::TuningFastScalarFSQRT, |
59 | X86::TuningFastSHLDRotate, |
60 | X86::TuningFastScalarShiftMasks, |
61 | X86::TuningFastVectorShiftMasks, |
62 | X86::TuningFastVariableCrossLaneShuffle, |
63 | X86::TuningFastVariablePerLaneShuffle, |
64 | X86::TuningFastVectorFSQRT, |
65 | X86::TuningLEAForSP, |
66 | X86::TuningLEAUsesAG, |
67 | X86::TuningLZCNTFalseDeps, |
68 | X86::TuningBranchFusion, |
69 | X86::TuningMacroFusion, |
70 | X86::TuningPadShortFunctions, |
71 | X86::TuningPOPCNTFalseDeps, |
72 | X86::TuningMULCFalseDeps, |
73 | X86::TuningPERMFalseDeps, |
74 | X86::TuningRANGEFalseDeps, |
75 | X86::TuningGETMANTFalseDeps, |
76 | X86::TuningMULLQFalseDeps, |
77 | X86::TuningSlow3OpsLEA, |
78 | X86::TuningSlowDivide32, |
79 | X86::TuningSlowDivide64, |
80 | X86::TuningSlowIncDec, |
81 | X86::TuningSlowLEA, |
82 | X86::TuningSlowPMADDWD, |
83 | X86::TuningSlowPMULLD, |
84 | X86::TuningSlowSHLD, |
85 | X86::TuningSlowTwoMemOps, |
86 | X86::TuningSlowUAMem16, |
87 | X86::TuningPreferMaskRegisters, |
88 | X86::TuningInsertVZEROUPPER, |
89 | X86::TuningUseSLMArithCosts, |
90 | X86::TuningUseGLMDivSqrtCosts, |
91 | X86::TuningNoDomainDelay, |
92 | X86::TuningNoDomainDelayMov, |
93 | X86::TuningNoDomainDelayShuffle, |
94 | X86::TuningNoDomainDelayBlend, |
95 | X86::TuningPreferShiftShuffle, |
96 | X86::TuningFastImmVectorShift, |
97 | X86::TuningFastDPWSSD, |
98 | |
99 | // Perf-tuning flags. |
100 | X86::TuningFastGather, |
101 | X86::TuningSlowUAMem32, |
102 | X86::TuningAllowLight256Bit, |
103 | |
104 | // Based on whether user set the -mprefer-vector-width command line. |
105 | X86::TuningPrefer128Bit, |
106 | X86::TuningPrefer256Bit, |
107 | |
108 | // CPU name enums. These just follow CPU string. |
109 | X86::ProcIntelAtom |
110 | }; |
111 | |
112 | public: |
113 | explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) |
114 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
115 | TLI(ST->getTargetLowering()) {} |
116 | |
117 | /// \name Scalar TTI Implementations |
118 | /// @{ |
119 | TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; |
120 | |
121 | /// @} |
122 | |
123 | /// \name Cache TTI Implementation |
124 | /// @{ |
125 | std::optional<unsigned> getCacheSize( |
126 | TargetTransformInfo::CacheLevel Level) const override; |
127 | std::optional<unsigned> getCacheAssociativity( |
128 | TargetTransformInfo::CacheLevel Level) const override; |
129 | /// @} |
130 | |
131 | /// \name Vector TTI Implementations |
132 | /// @{ |
133 | |
134 | unsigned getNumberOfRegisters(unsigned ClassID) const override; |
135 | bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const override; |
136 | TypeSize |
137 | getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; |
138 | unsigned getLoadStoreVecRegBitWidth(unsigned AS) const override; |
139 | unsigned getMaxInterleaveFactor(ElementCount VF) const override; |
140 | InstructionCost getArithmeticInstrCost( |
141 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
142 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
143 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
144 | ArrayRef<const Value *> Args = {}, |
145 | const Instruction *CxtI = nullptr) const override; |
146 | InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, |
147 | unsigned Opcode1, |
148 | const SmallBitVector &OpcodeMask, |
149 | TTI::TargetCostKind CostKind) const override; |
150 | |
151 | InstructionCost |
152 | getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, |
153 | ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, |
154 | VectorType *SubTp, ArrayRef<const Value *> Args = {}, |
155 | const Instruction *CxtI = nullptr) const override; |
156 | InstructionCost |
157 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
158 | TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, |
159 | const Instruction *I = nullptr) const override; |
160 | InstructionCost getCmpSelInstrCost( |
161 | unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, |
162 | TTI::TargetCostKind CostKind, |
163 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
164 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
165 | const Instruction *I = nullptr) const override; |
166 | using BaseT::getVectorInstrCost; |
167 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
168 | TTI::TargetCostKind CostKind, |
169 | unsigned Index, const Value *Op0, |
170 | const Value *Op1) const override; |
171 | InstructionCost getScalarizationOverhead( |
172 | VectorType *Ty, const APInt &DemandedElts, bool Insert, bool , |
173 | TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, |
174 | ArrayRef<Value *> VL = {}) const override; |
175 | InstructionCost |
176 | getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, |
177 | const APInt &DemandedDstElts, |
178 | TTI::TargetCostKind CostKind) const override; |
179 | InstructionCost getMemoryOpCost( |
180 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
181 | TTI::TargetCostKind CostKind, |
182 | TTI::OperandValueInfo OpInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
183 | const Instruction *I = nullptr) const override; |
184 | InstructionCost |
185 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
186 | unsigned AddressSpace, |
187 | TTI::TargetCostKind CostKind) const override; |
188 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
189 | const Value *Ptr, bool VariableMask, |
190 | Align Alignment, |
191 | TTI::TargetCostKind CostKind, |
192 | const Instruction *I) const override; |
193 | InstructionCost |
194 | getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, |
195 | const TTI::PointersChainInfo &Info, Type *AccessTy, |
196 | TTI::TargetCostKind CostKind) const override; |
197 | InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, |
198 | const SCEV *Ptr) const override; |
199 | |
200 | std::optional<Instruction *> |
201 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; |
202 | std::optional<Value *> |
203 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
204 | APInt DemandedMask, KnownBits &Known, |
205 | bool &KnownBitsComputed) const override; |
206 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
207 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
208 | APInt &UndefElts2, APInt &UndefElts3, |
209 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
210 | SimplifyAndSetOp) const override; |
211 | |
212 | unsigned getAtomicMemIntrinsicMaxElementSize() const override; |
213 | |
214 | InstructionCost |
215 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
216 | TTI::TargetCostKind CostKind) const override; |
217 | |
218 | InstructionCost |
219 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
220 | std::optional<FastMathFlags> FMF, |
221 | TTI::TargetCostKind CostKind) const override; |
222 | |
223 | InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, |
224 | TTI::TargetCostKind CostKind, |
225 | FastMathFlags FMF) const; |
226 | |
227 | InstructionCost |
228 | getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, |
229 | TTI::TargetCostKind CostKind) const override; |
230 | |
231 | InstructionCost getInterleavedMemoryOpCost( |
232 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
233 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
234 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; |
235 | InstructionCost getInterleavedMemoryOpCostAVX512( |
236 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
237 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
238 | TTI::TargetCostKind CostKind, bool UseMaskForCond = false, |
239 | bool UseMaskForGaps = false) const; |
240 | |
241 | InstructionCost getIntImmCost(int64_t) const; |
242 | |
243 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
244 | TTI::TargetCostKind CostKind) const override; |
245 | |
246 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
247 | const Instruction *I = nullptr) const override; |
248 | |
249 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
250 | const APInt &Imm, Type *Ty, |
251 | TTI::TargetCostKind CostKind, |
252 | Instruction *Inst = nullptr) const override; |
253 | InstructionCost |
254 | getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, |
255 | Type *Ty, TTI::TargetCostKind CostKind) const override; |
256 | /// Return the cost of the scaling factor used in the addressing |
257 | /// mode represented by AM for this target, for a load/store |
258 | /// of the specified type. |
259 | /// If the AM is supported, the return value must be >= 0. |
260 | /// If the AM is not supported, it returns an invalid cost. |
261 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
262 | StackOffset BaseOffset, bool HasBaseReg, |
263 | int64_t Scale, |
264 | unsigned AddrSpace) const override; |
265 | |
266 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
267 | const TargetTransformInfo::LSRCost &C2) const override; |
268 | bool canMacroFuseCmp() const override; |
269 | bool isLegalMaskedLoad(Type *DataType, Align Alignment, |
270 | unsigned AddressSpace) const override; |
271 | bool isLegalMaskedStore(Type *DataType, Align Alignment, |
272 | unsigned AddressSpace) const override; |
273 | bool isLegalNTLoad(Type *DataType, Align Alignment) const override; |
274 | bool isLegalNTStore(Type *DataType, Align Alignment) const override; |
275 | bool isLegalBroadcastLoad(Type *ElementTy, |
276 | ElementCount NumElements) const override; |
277 | bool forceScalarizeMaskedGather(VectorType *VTy, |
278 | Align Alignment) const override; |
279 | bool forceScalarizeMaskedScatter(VectorType *VTy, |
280 | Align Alignment) const override { |
281 | return forceScalarizeMaskedGather(VTy, Alignment); |
282 | } |
283 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) const; |
284 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const override; |
285 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override; |
286 | bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override; |
287 | bool isLegalMaskedCompressStore(Type *DataType, |
288 | Align Alignment) const override; |
289 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, |
290 | const SmallBitVector &OpcodeMask) const override; |
291 | bool hasDivRemOp(Type *DataType, bool IsSigned) const override; |
292 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override; |
293 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const override; |
294 | bool areInlineCompatible(const Function *Caller, |
295 | const Function *Callee) const override; |
296 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, |
297 | const ArrayRef<Type *> &Type) const override; |
298 | |
299 | uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override { |
300 | return ST->getMaxInlineSizeThreshold(); |
301 | } |
302 | |
303 | TTI::MemCmpExpansionOptions |
304 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; |
305 | bool preferAlternateOpcodeVectorization() const override { return false; } |
306 | bool prefersVectorizedAddressing() const override; |
307 | bool supportsEfficientVectorElementLoadStore() const override; |
308 | bool enableInterleavedAccessVectorization() const override; |
309 | |
310 | InstructionCost getBranchMispredictPenalty() const override; |
311 | |
312 | bool isProfitableToSinkOperands(Instruction *I, |
313 | SmallVectorImpl<Use *> &Ops) const override; |
314 | |
315 | bool isVectorShiftByScalarCheap(Type *Ty) const override; |
316 | |
317 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, |
318 | Type *ScalarValTy) const override; |
319 | |
320 | private: |
321 | bool supportsGather() const; |
322 | InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
323 | Type *DataTy, const Value *Ptr, |
324 | Align Alignment, unsigned AddressSpace) const; |
325 | |
326 | int getGatherOverhead() const; |
327 | int getScatterOverhead() const; |
328 | |
329 | /// @} |
330 | }; |
331 | |
332 | } // end namespace llvm |
333 | |
334 | #endif |
335 | |