1//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// \file
8/// This file implements a TargetTransformInfo analysis pass specific to the
9/// Hexagon target machine. It uses the target's detailed information to provide
10/// more precise answers to certain TTI queries, while letting the target
11/// independent and default TTI implementations handle the rest.
12///
13//===----------------------------------------------------------------------===//
14
15#include "HexagonTargetTransformInfo.h"
16#include "HexagonSubtarget.h"
17#include "llvm/Analysis/TargetTransformInfo.h"
18#include "llvm/CodeGen/ValueTypes.h"
19#include "llvm/IR/InstrTypes.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/User.h"
22#include "llvm/Support/Casting.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Transforms/Utils/LoopPeel.h"
25#include "llvm/Transforms/Utils/UnrollLoop.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "hexagontti"
30
31static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(Val: false),
32 cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
33
34cl::opt<bool> HexagonAllowScatterGatherHVX(
35 "hexagon-allow-scatter-gather-hvx", cl::init(Val: false), cl::Hidden,
36 cl::desc("Allow auto-generation of HVX scatter-gather"));
37
38static cl::opt<bool> EnableV68FloatAutoHVX(
39 "force-hvx-float", cl::Hidden,
40 cl::desc("Enable auto-vectorization of floatint point types on v68."));
41
42static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
43 cl::init(Val: true), cl::Hidden,
44 cl::desc("Control lookup table emission on Hexagon target"));
45
46static cl::opt<bool> HexagonMaskedVMem("hexagon-masked-vmem", cl::init(Val: true),
47 cl::Hidden, cl::desc("Enable masked loads/stores for HVX"));
48
49// Constant "cost factor" to make floating point operations more expensive
50// in terms of vectorization cost. This isn't the best way, but it should
51// do. Ultimately, the cost should use cycles.
52static const unsigned FloatFactor = 4;
53
54bool HexagonTTIImpl::useHVX() const {
55 return ST.useHVXOps() && HexagonAutoHVX;
56}
57
58bool HexagonTTIImpl::isHVXVectorType(Type *Ty) const {
59 auto *VecTy = dyn_cast<VectorType>(Val: Ty);
60 if (!VecTy)
61 return false;
62 if (!ST.isTypeForHVX(VecTy))
63 return false;
64 if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
65 return true;
66 return ST.useHVXV68Ops() && EnableV68FloatAutoHVX;
67}
68
69unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
70 if (auto *VTy = dyn_cast<FixedVectorType>(Val: Ty))
71 return VTy->getNumElements();
72 assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
73 "Expecting scalar type");
74 return 1;
75}
76
77TargetTransformInfo::PopcntSupportKind
78HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
79 // Return fast hardware support as every input < 64 bits will be promoted
80 // to 64 bits.
81 return TargetTransformInfo::PSK_FastHardware;
82}
83
84// The Hexagon target can unroll loops with run-time trip counts.
85void HexagonTTIImpl::getUnrollingPreferences(
86 Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
87 OptimizationRemarkEmitter *ORE) const {
88 UP.Runtime = UP.Partial = true;
89}
90
91void HexagonTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
92 TTI::PeelingPreferences &PP) const {
93 BaseT::getPeelingPreferences(L, SE, PP);
94 // Only try to peel innermost loops with small runtime trip counts.
95 if (L && L->isInnermost() && canPeel(L) &&
96 SE.getSmallConstantTripCount(L) == 0 &&
97 SE.getSmallConstantMaxTripCount(L) > 0 &&
98 SE.getSmallConstantMaxTripCount(L) <= 5) {
99 PP.PeelCount = 2;
100 }
101}
102
103TTI::AddressingModeKind
104HexagonTTIImpl::getPreferredAddressingMode(const Loop *L,
105 ScalarEvolution *SE) const {
106 return TTI::AMK_PostIndexed;
107}
108
109/// --- Vector TTI begin ---
110
111unsigned HexagonTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
112 bool Vector = ClassID == 1;
113 if (Vector)
114 return useHVX() ? 32 : 0;
115 return 32;
116}
117
118unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) const {
119 return useHVX() ? 2 : 1;
120}
121
122TypeSize
123HexagonTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
124 switch (K) {
125 case TargetTransformInfo::RGK_Scalar:
126 return TypeSize::getFixed(ExactSize: 32);
127 case TargetTransformInfo::RGK_FixedWidthVector:
128 return TypeSize::getFixed(ExactSize: getMinVectorRegisterBitWidth());
129 case TargetTransformInfo::RGK_ScalableVector:
130 return TypeSize::getScalable(MinimumSize: 0);
131 }
132
133 llvm_unreachable("Unsupported register kind");
134}
135
136unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
137 return useHVX() ? ST.getVectorLength()*8 : 32;
138}
139
140ElementCount HexagonTTIImpl::getMinimumVF(unsigned ElemWidth,
141 bool IsScalable) const {
142 assert(!IsScalable && "Scalable VFs are not supported for Hexagon");
143 return ElementCount::getFixed(MinVal: (8 * ST.getVectorLength()) / ElemWidth);
144}
145
146InstructionCost
147HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
148 TTI::TargetCostKind CostKind) const {
149 return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind);
150}
151
152InstructionCost
153HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
154 TTI::TargetCostKind CostKind) const {
155 if (ICA.getID() == Intrinsic::bswap) {
156 std::pair<InstructionCost, MVT> LT =
157 getTypeLegalizationCost(Ty: ICA.getReturnType());
158 return LT.first + 2;
159 }
160 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
161}
162
163InstructionCost
164HexagonTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
165 const SCEV *S,
166 TTI::TargetCostKind CostKind) const {
167 return 0;
168}
169
170InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
171 Align Alignment,
172 unsigned AddressSpace,
173 TTI::TargetCostKind CostKind,
174 TTI::OperandValueInfo OpInfo,
175 const Instruction *I) const {
176 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
177 // TODO: Handle other cost kinds.
178 if (CostKind != TTI::TCK_RecipThroughput)
179 return 1;
180
181 if (Opcode == Instruction::Store)
182 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
183 CostKind, OpInfo, I);
184
185 if (Src->isVectorTy()) {
186 VectorType *VecTy = cast<VectorType>(Val: Src);
187 unsigned VecWidth = VecTy->getPrimitiveSizeInBits().getFixedValue();
188 if (isHVXVectorType(Ty: VecTy)) {
189 unsigned RegWidth =
190 getRegisterBitWidth(K: TargetTransformInfo::RGK_FixedWidthVector)
191 .getFixedValue();
192 assert(RegWidth && "Non-zero vector register width expected");
193 // Cost of HVX loads.
194 if (VecWidth % RegWidth == 0)
195 return VecWidth / RegWidth;
196 // Cost of constructing HVX vector from scalar loads
197 const Align RegAlign(RegWidth / 8);
198 if (Alignment > RegAlign)
199 Alignment = RegAlign;
200 unsigned AlignWidth = 8 * Alignment.value();
201 unsigned NumLoads = alignTo(Value: VecWidth, Align: AlignWidth) / AlignWidth;
202 return 3 * NumLoads;
203 }
204
205 // Non-HVX vectors.
206 // Add extra cost for floating point types.
207 unsigned Cost =
208 VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1;
209
210 // At this point unspecified alignment is considered as Align(1).
211 const Align BoundAlignment = std::min(a: Alignment, b: Align(8));
212 unsigned AlignWidth = 8 * BoundAlignment.value();
213 unsigned NumLoads = alignTo(Value: VecWidth, Align: AlignWidth) / AlignWidth;
214 if (Alignment == Align(4) || Alignment == Align(8))
215 return Cost * NumLoads;
216 // Loads of less than 32 bits will need extra inserts to compose a vector.
217 assert(BoundAlignment <= Align(8));
218 unsigned LogA = Log2(A: BoundAlignment);
219 return (3 - LogA) * Cost * NumLoads;
220 }
221
222 return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
223 OpInfo, I);
224}
225
226InstructionCost
227HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
228 VectorType *SrcTy, ArrayRef<int> Mask,
229 TTI::TargetCostKind CostKind, int Index,
230 VectorType *SubTp, ArrayRef<const Value *> Args,
231 const Instruction *CxtI) const {
232 return 1;
233}
234
235InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
236 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
237 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
238 bool UseMaskForCond, bool UseMaskForGaps) const {
239 if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps)
240 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
241 Alignment, AddressSpace,
242 CostKind,
243 UseMaskForCond, UseMaskForGaps);
244 return getMemoryOpCost(Opcode, Src: VecTy, Alignment, AddressSpace, CostKind);
245}
246
247InstructionCost HexagonTTIImpl::getCmpSelInstrCost(
248 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
249 TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
250 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
251 if (ValTy->isVectorTy() && CostKind == TTI::TCK_RecipThroughput) {
252 if (!isHVXVectorType(Ty: ValTy) && ValTy->isFPOrFPVectorTy())
253 return InstructionCost::getMax();
254 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty: ValTy);
255 if (Opcode == Instruction::FCmp)
256 return LT.first + FloatFactor * getTypeNumElements(Ty: ValTy);
257 }
258 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
259 Op1Info, Op2Info, I);
260}
261
262InstructionCost HexagonTTIImpl::getArithmeticInstrCost(
263 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
264 TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
265 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
266 // TODO: Handle more cost kinds.
267 if (CostKind != TTI::TCK_RecipThroughput)
268 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info: Op1Info,
269 Opd2Info: Op2Info, Args, CxtI);
270
271 if (Ty->isVectorTy()) {
272 if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
273 return InstructionCost::getMax();
274 std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
275 if (LT.second.isFloatingPoint())
276 return LT.first + FloatFactor * getTypeNumElements(Ty);
277 }
278 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info: Op1Info, Opd2Info: Op2Info,
279 Args, CxtI);
280}
281
282InstructionCost HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
283 Type *SrcTy,
284 TTI::CastContextHint CCH,
285 TTI::TargetCostKind CostKind,
286 const Instruction *I) const {
287 auto isNonHVXFP = [this] (Type *Ty) {
288 return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
289 };
290 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
291 return InstructionCost::getMax();
292
293 if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
294 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(Ty: SrcTy) : 0;
295 unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(Ty: DstTy) : 0;
296
297 std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Ty: SrcTy);
298 std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Ty: DstTy);
299 InstructionCost Cost =
300 std::max(a: SrcLT.first, b: DstLT.first) + FloatFactor * (SrcN + DstN);
301 // TODO: Allow non-throughput costs that aren't binary.
302 if (CostKind != TTI::TCK_RecipThroughput)
303 return Cost == 0 ? 0 : 1;
304 return Cost;
305 }
306 return 1;
307}
308
309InstructionCost HexagonTTIImpl::getVectorInstrCost(
310 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
311 const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {
312 Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
313 : Val;
314 if (Opcode == Instruction::InsertElement) {
315 // Need two rotations for non-zero index.
316 unsigned Cost = (Index != 0) ? 2 : 0;
317 if (ElemTy->isIntegerTy(Bitwidth: 32))
318 return Cost;
319 // If it's not a 32-bit value, there will need to be an extract.
320 return Cost + getVectorInstrCost(Opcode: Instruction::ExtractElement, Val, CostKind,
321 Index, Op0, Op1, VIC);
322 }
323
324 if (Opcode == Instruction::ExtractElement)
325 return 2;
326
327 return 1;
328}
329
330bool HexagonTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
331 switch (II->getIntrinsicID()) {
332 case Intrinsic::vector_reduce_add:
333 return false;
334 }
335 return true;
336}
337
338bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/,
339 unsigned /*AddressSpace*/,
340 TTI::MaskKind /*MaskKind*/) const {
341 // This function is called from scalarize-masked-mem-intrin, which runs
342 // in pre-isel. Use ST directly instead of calling isHVXVectorType.
343 return HexagonMaskedVMem && ST.isTypeForHVX(VecTy: DataType);
344}
345
346bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/,
347 unsigned /*AddressSpace*/,
348 TTI::MaskKind /*MaskKind*/) const {
349 // This function is called from scalarize-masked-mem-intrin, which runs
350 // in pre-isel. Use ST directly instead of calling isHVXVectorType.
351 return HexagonMaskedVMem && ST.isTypeForHVX(VecTy: DataType);
352}
353
354bool HexagonTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) const {
355 // For now assume we can not deal with all HVX datatypes.
356 if (!Ty->isVectorTy() || !ST.isTypeForHVX(VecTy: Ty) ||
357 !HexagonAllowScatterGatherHVX)
358 return false;
359 // This must be in sync with HexagonVectorCombine pass.
360 switch (Ty->getScalarSizeInBits()) {
361 case 8:
362 return (getTypeNumElements(Ty) == 128);
363 case 16:
364 if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
365 return (Alignment >= 2);
366 break;
367 case 32:
368 if (getTypeNumElements(Ty) == 32)
369 return (Alignment >= 4);
370 break;
371 default:
372 break;
373 }
374 return false;
375}
376
377bool HexagonTTIImpl::isLegalMaskedScatter(Type *Ty, Align Alignment) const {
378 if (!Ty->isVectorTy() || !ST.isTypeForHVX(VecTy: Ty) ||
379 !HexagonAllowScatterGatherHVX)
380 return false;
381 // This must be in sync with HexagonVectorCombine pass.
382 switch (Ty->getScalarSizeInBits()) {
383 case 8:
384 return (getTypeNumElements(Ty) == 128);
385 case 16:
386 if (getTypeNumElements(Ty) == 64)
387 return (Alignment >= 2);
388 break;
389 case 32:
390 if (getTypeNumElements(Ty) == 32)
391 return (Alignment >= 4);
392 break;
393 default:
394 break;
395 }
396 return false;
397}
398
399bool HexagonTTIImpl::forceScalarizeMaskedGather(VectorType *VTy,
400 Align Alignment) const {
401 return !isLegalMaskedGather(Ty: VTy, Alignment);
402}
403
404bool HexagonTTIImpl::forceScalarizeMaskedScatter(VectorType *VTy,
405 Align Alignment) const {
406 return !isLegalMaskedScatter(Ty: VTy, Alignment);
407}
408
409/// --- Vector TTI end ---
410
411unsigned HexagonTTIImpl::getPrefetchDistance() const {
412 return ST.getL1PrefetchDistance();
413}
414
415unsigned HexagonTTIImpl::getCacheLineSize() const {
416 return ST.getL1CacheLineSize();
417}
418
419InstructionCost
420HexagonTTIImpl::getInstructionCost(const User *U,
421 ArrayRef<const Value *> Operands,
422 TTI::TargetCostKind CostKind) const {
423 auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
424 if (!CI->isIntegerCast())
425 return false;
426 // Only extensions from an integer type shorter than 32-bit to i32
427 // can be folded into the load.
428 const DataLayout &DL = getDataLayout();
429 unsigned SBW = DL.getTypeSizeInBits(Ty: CI->getSrcTy());
430 unsigned DBW = DL.getTypeSizeInBits(Ty: CI->getDestTy());
431 if (DBW != 32 || SBW >= DBW)
432 return false;
433
434 const LoadInst *LI = dyn_cast<const LoadInst>(Val: CI->getOperand(i_nocapture: 0));
435 // Technically, this code could allow multiple uses of the load, and
436 // check if all the uses are the same extension operation, but this
437 // should be sufficient for most cases.
438 return LI && LI->hasOneUse();
439 };
440
441 if (const CastInst *CI = dyn_cast<const CastInst>(Val: U))
442 if (isCastFoldedIntoLoad(CI))
443 return TargetTransformInfo::TCC_Free;
444 return BaseT::getInstructionCost(U, Operands, CostKind);
445}
446
447bool HexagonTTIImpl::shouldBuildLookupTables() const {
448 return EmitLookupTables;
449}
450