1 | //===- RISCVTargetTransformInfo.h - RISC-V specific TTI ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file defines a TargetTransformInfo::Concept conforming object specific |
10 | /// to the RISC-V target machine. It uses the target's detailed information to |
11 | /// provide more precise answers to certain TTI queries, while letting the |
12 | /// target independent and default TTI implementations handle the rest. |
13 | /// |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H |
17 | #define LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H |
18 | |
19 | #include "RISCVSubtarget.h" |
20 | #include "RISCVTargetMachine.h" |
21 | #include "llvm/Analysis/IVDescriptors.h" |
22 | #include "llvm/Analysis/TargetTransformInfo.h" |
23 | #include "llvm/CodeGen/BasicTTIImpl.h" |
24 | #include "llvm/IR/Function.h" |
25 | #include <optional> |
26 | |
27 | namespace llvm { |
28 | |
29 | class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> { |
30 | using BaseT = BasicTTIImplBase<RISCVTTIImpl>; |
31 | using TTI = TargetTransformInfo; |
32 | |
33 | friend BaseT; |
34 | |
35 | const RISCVSubtarget *ST; |
36 | const RISCVTargetLowering *TLI; |
37 | |
38 | const RISCVSubtarget *getST() const { return ST; } |
39 | const RISCVTargetLowering *getTLI() const { return TLI; } |
40 | |
41 | /// This function returns an estimate for VL to be used in VL based terms |
42 | /// of the cost model. For fixed length vectors, this is simply the |
43 | /// vector length. For scalable vectors, we return results consistent |
44 | /// with getVScaleForTuning under the assumption that clients are also |
45 | /// using that when comparing costs between scalar and vector representation. |
46 | /// This does unfortunately mean that we can both undershoot and overshot |
47 | /// the true cost significantly if getVScaleForTuning is wildly off for the |
48 | /// actual target hardware. |
49 | unsigned getEstimatedVLFor(VectorType *Ty); |
50 | |
51 | InstructionCost getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT, |
52 | TTI::TargetCostKind CostKind); |
53 | |
54 | /// Return the cost of accessing a constant pool entry of the specified |
55 | /// type. |
56 | InstructionCost getConstantPoolLoadCost(Type *Ty, |
57 | TTI::TargetCostKind CostKind); |
58 | public: |
59 | explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) |
60 | : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), |
61 | TLI(ST->getTargetLowering()) {} |
62 | |
63 | bool areInlineCompatible(const Function *Caller, |
64 | const Function *Callee) const; |
65 | |
66 | /// Return the cost of materializing an immediate for a value operand of |
67 | /// a store instruction. |
68 | InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, |
69 | TTI::TargetCostKind CostKind); |
70 | |
71 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
72 | TTI::TargetCostKind CostKind); |
73 | InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, |
74 | const APInt &Imm, Type *Ty, |
75 | TTI::TargetCostKind CostKind, |
76 | Instruction *Inst = nullptr); |
77 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
78 | const APInt &Imm, Type *Ty, |
79 | TTI::TargetCostKind CostKind); |
80 | |
81 | /// \name EVL Support for predicated vectorization. |
82 | /// Whether the target supports the %evl parameter of VP intrinsic efficiently |
83 | /// in hardware, for the given opcode and type/alignment. (see LLVM Language |
84 | /// Reference - "Vector Predication Intrinsics", |
85 | /// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and |
86 | /// "IR-level VP intrinsics", |
87 | /// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics). |
88 | /// \param Opcode the opcode of the instruction checked for predicated version |
89 | /// support. |
90 | /// \param DataType the type of the instruction with the \p Opcode checked for |
91 | /// prediction support. |
92 | /// \param Alignment the alignment for memory access operation checked for |
93 | /// predicated version support. |
94 | bool hasActiveVectorLength(unsigned Opcode, Type *DataType, |
95 | Align Alignment) const; |
96 | |
97 | TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth); |
98 | |
99 | bool shouldExpandReduction(const IntrinsicInst *II) const; |
100 | bool supportsScalableVectors() const { return ST->hasVInstructions(); } |
101 | bool enableOrderedReductions() const { return true; } |
102 | bool enableScalableVectorization() const { return ST->hasVInstructions(); } |
103 | TailFoldingStyle |
104 | getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { |
105 | return ST->hasVInstructions() ? TailFoldingStyle::Data |
106 | : TailFoldingStyle::DataWithoutLaneMask; |
107 | } |
108 | std::optional<unsigned> getMaxVScale() const; |
109 | std::optional<unsigned> getVScaleForTuning() const; |
110 | |
111 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; |
112 | |
113 | unsigned getRegUsageForType(Type *Ty); |
114 | |
115 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; |
116 | |
117 | bool preferEpilogueVectorization() const { |
118 | // Epilogue vectorization is usually unprofitable - tail folding or |
119 | // a smaller VF would have been better. This a blunt hammer - we |
120 | // should re-examine this once vectorization is better tuned. |
121 | return false; |
122 | } |
123 | |
124 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
125 | Align Alignment, unsigned AddressSpace, |
126 | TTI::TargetCostKind CostKind); |
127 | |
128 | InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, |
129 | const Value *Base, |
130 | const TTI::PointersChainInfo &Info, |
131 | Type *AccessTy, |
132 | TTI::TargetCostKind CostKind); |
133 | |
134 | void (Loop *L, ScalarEvolution &SE, |
135 | TTI::UnrollingPreferences &UP, |
136 | OptimizationRemarkEmitter *ORE); |
137 | |
138 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
139 | TTI::PeelingPreferences &PP); |
140 | |
141 | unsigned getMinVectorRegisterBitWidth() const { |
142 | return ST->useRVVForFixedLengthVectors() ? 16 : 0; |
143 | } |
144 | |
145 | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, |
146 | ArrayRef<int> Mask, |
147 | TTI::TargetCostKind CostKind, int Index, |
148 | VectorType *SubTp, |
149 | ArrayRef<const Value *> Args = std::nullopt, |
150 | const Instruction *CxtI = nullptr); |
151 | |
152 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
153 | TTI::TargetCostKind CostKind); |
154 | |
155 | InstructionCost getInterleavedMemoryOpCost( |
156 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
157 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
158 | bool UseMaskForCond = false, bool UseMaskForGaps = false); |
159 | |
160 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
161 | const Value *Ptr, bool VariableMask, |
162 | Align Alignment, |
163 | TTI::TargetCostKind CostKind, |
164 | const Instruction *I); |
165 | |
166 | InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, |
167 | const Value *Ptr, bool VariableMask, |
168 | Align Alignment, |
169 | TTI::TargetCostKind CostKind, |
170 | const Instruction *I); |
171 | |
172 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
173 | TTI::CastContextHint CCH, |
174 | TTI::TargetCostKind CostKind, |
175 | const Instruction *I = nullptr); |
176 | |
177 | InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, |
178 | FastMathFlags FMF, |
179 | TTI::TargetCostKind CostKind); |
180 | |
181 | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
182 | std::optional<FastMathFlags> FMF, |
183 | TTI::TargetCostKind CostKind); |
184 | |
185 | InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, |
186 | Type *ResTy, VectorType *ValTy, |
187 | FastMathFlags FMF, |
188 | TTI::TargetCostKind CostKind); |
189 | |
190 | InstructionCost |
191 | getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, |
192 | unsigned AddressSpace, TTI::TargetCostKind CostKind, |
193 | TTI::OperandValueInfo OpdInfo = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
194 | const Instruction *I = nullptr); |
195 | |
196 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
197 | CmpInst::Predicate VecPred, |
198 | TTI::TargetCostKind CostKind, |
199 | const Instruction *I = nullptr); |
200 | |
201 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
202 | const Instruction *I = nullptr); |
203 | |
204 | using BaseT::getVectorInstrCost; |
205 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
206 | TTI::TargetCostKind CostKind, |
207 | unsigned Index, Value *Op0, Value *Op1); |
208 | |
209 | InstructionCost getArithmeticInstrCost( |
210 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
211 | TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
212 | TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None}, |
213 | ArrayRef<const Value *> Args = std::nullopt, |
214 | const Instruction *CxtI = nullptr); |
215 | |
216 | bool isElementTypeLegalForScalableVector(Type *Ty) const { |
217 | return TLI->isLegalElementTypeForRVV(ScalarTy: TLI->getValueType(DL, Ty)); |
218 | } |
219 | |
220 | bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { |
221 | if (!ST->hasVInstructions()) |
222 | return false; |
223 | |
224 | EVT DataTypeVT = TLI->getValueType(DL, Ty: DataType); |
225 | |
226 | // Only support fixed vectors if we know the minimum vector size. |
227 | if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors()) |
228 | return false; |
229 | |
230 | EVT ElemType = DataTypeVT.getScalarType(); |
231 | if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) |
232 | return false; |
233 | |
234 | return TLI->isLegalElementTypeForRVV(ScalarTy: ElemType); |
235 | |
236 | } |
237 | |
238 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) { |
239 | return isLegalMaskedLoadStore(DataType, Alignment); |
240 | } |
241 | bool isLegalMaskedStore(Type *DataType, Align Alignment) { |
242 | return isLegalMaskedLoadStore(DataType, Alignment); |
243 | } |
244 | |
245 | bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) { |
246 | if (!ST->hasVInstructions()) |
247 | return false; |
248 | |
249 | EVT DataTypeVT = TLI->getValueType(DL, Ty: DataType); |
250 | |
251 | // Only support fixed vectors if we know the minimum vector size. |
252 | if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors()) |
253 | return false; |
254 | |
255 | EVT ElemType = DataTypeVT.getScalarType(); |
256 | if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) |
257 | return false; |
258 | |
259 | return TLI->isLegalElementTypeForRVV(ScalarTy: ElemType); |
260 | } |
261 | |
262 | bool isLegalMaskedGather(Type *DataType, Align Alignment) { |
263 | return isLegalMaskedGatherScatter(DataType, Alignment); |
264 | } |
265 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) { |
266 | return isLegalMaskedGatherScatter(DataType, Alignment); |
267 | } |
268 | |
269 | bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { |
270 | // Scalarize masked gather for RV64 if EEW=64 indices aren't supported. |
271 | return ST->is64Bit() && !ST->hasVInstructionsI64(); |
272 | } |
273 | |
274 | bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { |
275 | // Scalarize masked scatter for RV64 if EEW=64 indices aren't supported. |
276 | return ST->is64Bit() && !ST->hasVInstructionsI64(); |
277 | } |
278 | |
279 | bool isLegalStridedLoadStore(Type *DataType, Align Alignment) { |
280 | EVT DataTypeVT = TLI->getValueType(DL, Ty: DataType); |
281 | return TLI->isLegalStridedLoadStore(DataType: DataTypeVT, Alignment); |
282 | } |
283 | |
284 | bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment); |
285 | |
286 | bool isVScaleKnownToBeAPowerOfTwo() const { |
287 | return TLI->isVScaleKnownToBeAPowerOfTwo(); |
288 | } |
289 | |
290 | /// \returns How the target needs this vector-predicated operation to be |
291 | /// transformed. |
292 | TargetTransformInfo::VPLegalization |
293 | getVPLegalizationStrategy(const VPIntrinsic &PI) const { |
294 | using VPLegalization = TargetTransformInfo::VPLegalization; |
295 | if (!ST->hasVInstructions() || |
296 | (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul && |
297 | cast<VectorType>(Val: PI.getArgOperand(i: 1)->getType()) |
298 | ->getElementType() |
299 | ->getIntegerBitWidth() != 1)) |
300 | return VPLegalization(VPLegalization::Discard, VPLegalization::Convert); |
301 | return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); |
302 | } |
303 | |
304 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
305 | ElementCount VF) const { |
306 | if (!VF.isScalable()) |
307 | return true; |
308 | |
309 | Type *Ty = RdxDesc.getRecurrenceType(); |
310 | if (!TLI->isLegalElementTypeForRVV(ScalarTy: TLI->getValueType(DL, Ty))) |
311 | return false; |
312 | |
313 | switch (RdxDesc.getRecurrenceKind()) { |
314 | case RecurKind::Add: |
315 | case RecurKind::FAdd: |
316 | case RecurKind::And: |
317 | case RecurKind::Or: |
318 | case RecurKind::Xor: |
319 | case RecurKind::SMin: |
320 | case RecurKind::SMax: |
321 | case RecurKind::UMin: |
322 | case RecurKind::UMax: |
323 | case RecurKind::FMin: |
324 | case RecurKind::FMax: |
325 | case RecurKind::FMulAdd: |
326 | case RecurKind::IAnyOf: |
327 | case RecurKind::FAnyOf: |
328 | return true; |
329 | default: |
330 | return false; |
331 | } |
332 | } |
333 | |
334 | unsigned getMaxInterleaveFactor(ElementCount VF) { |
335 | // Don't interleave if the loop has been vectorized with scalable vectors. |
336 | if (VF.isScalable()) |
337 | return 1; |
338 | // If the loop will not be vectorized, don't interleave the loop. |
339 | // Let regular unroll to unroll the loop. |
340 | return VF.isScalar() ? 1 : ST->getMaxInterleaveFactor(); |
341 | } |
342 | |
343 | bool enableInterleavedAccessVectorization() { return true; } |
344 | |
345 | enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; |
346 | unsigned getNumberOfRegisters(unsigned ClassID) const { |
347 | switch (ClassID) { |
348 | case RISCVRegisterClass::GPRRC: |
349 | // 31 = 32 GPR - x0 (zero register) |
350 | // FIXME: Should we exclude fixed registers like SP, TP or GP? |
351 | return 31; |
352 | case RISCVRegisterClass::FPRRC: |
353 | if (ST->hasStdExtF()) |
354 | return 32; |
355 | return 0; |
356 | case RISCVRegisterClass::VRRC: |
357 | // Although there are 32 vector registers, v0 is special in that it is the |
358 | // only register that can be used to hold a mask. |
359 | // FIXME: Should we conservatively return 31 as the number of usable |
360 | // vector registers? |
361 | return ST->hasVInstructions() ? 32 : 0; |
362 | } |
363 | llvm_unreachable("unknown register class" ); |
364 | } |
365 | |
366 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { |
367 | if (Vector) |
368 | return RISCVRegisterClass::VRRC; |
369 | if (!Ty) |
370 | return RISCVRegisterClass::GPRRC; |
371 | |
372 | Type *ScalarTy = Ty->getScalarType(); |
373 | if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhmin()) || |
374 | (ScalarTy->isFloatTy() && ST->hasStdExtF()) || |
375 | (ScalarTy->isDoubleTy() && ST->hasStdExtD())) { |
376 | return RISCVRegisterClass::FPRRC; |
377 | } |
378 | |
379 | return RISCVRegisterClass::GPRRC; |
380 | } |
381 | |
382 | const char *getRegisterClassName(unsigned ClassID) const { |
383 | switch (ClassID) { |
384 | case RISCVRegisterClass::GPRRC: |
385 | return "RISCV::GPRRC" ; |
386 | case RISCVRegisterClass::FPRRC: |
387 | return "RISCV::FPRRC" ; |
388 | case RISCVRegisterClass::VRRC: |
389 | return "RISCV::VRRC" ; |
390 | } |
391 | llvm_unreachable("unknown register class" ); |
392 | } |
393 | |
394 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, |
395 | const TargetTransformInfo::LSRCost &C2); |
396 | |
397 | bool shouldFoldTerminatingConditionAfterLSR() const { |
398 | return true; |
399 | } |
400 | |
401 | std::optional<unsigned> getMinPageSize() const { return 4096; } |
402 | }; |
403 | |
404 | } // end namespace llvm |
405 | |
406 | #endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H |
407 | |