1 | //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file defines the WebAssembly-specific TargetTransformInfo |
11 | /// implementation. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "WebAssemblyTargetTransformInfo.h" |
16 | #include "llvm/CodeGen/CostTable.h" |
17 | #include "llvm/Support/Debug.h" |
18 | using namespace llvm; |
19 | |
20 | #define DEBUG_TYPE "wasmtti" |
21 | |
22 | TargetTransformInfo::PopcntSupportKind |
23 | WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { |
24 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2" ); |
25 | return TargetTransformInfo::PSK_FastHardware; |
26 | } |
27 | |
28 | unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const { |
29 | unsigned Result = BaseT::getNumberOfRegisters(ClassID); |
30 | |
31 | // For SIMD, use at least 16 registers, as a rough guess. |
32 | bool Vector = (ClassID == 1); |
33 | if (Vector) |
34 | Result = std::max(a: Result, b: 16u); |
35 | |
36 | return Result; |
37 | } |
38 | |
39 | TypeSize WebAssemblyTTIImpl::getRegisterBitWidth( |
40 | TargetTransformInfo::RegisterKind K) const { |
41 | switch (K) { |
42 | case TargetTransformInfo::RGK_Scalar: |
43 | return TypeSize::getFixed(ExactSize: 64); |
44 | case TargetTransformInfo::RGK_FixedWidthVector: |
45 | return TypeSize::getFixed(ExactSize: getST()->hasSIMD128() ? 128 : 64); |
46 | case TargetTransformInfo::RGK_ScalableVector: |
47 | return TypeSize::getScalable(MinimumSize: 0); |
48 | } |
49 | |
50 | llvm_unreachable("Unsupported register kind" ); |
51 | } |
52 | |
53 | InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost( |
54 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
55 | TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, |
56 | ArrayRef<const Value *> Args, |
57 | const Instruction *CxtI) { |
58 | |
59 | InstructionCost Cost = |
60 | BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( |
61 | Opcode, Ty, CostKind, Opd1Info: Op1Info, Opd2Info: Op2Info); |
62 | |
63 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
64 | switch (Opcode) { |
65 | case Instruction::LShr: |
66 | case Instruction::AShr: |
67 | case Instruction::Shl: |
68 | // SIMD128's shifts currently only accept a scalar shift count. For each |
69 | // element, we'll need to extract, op, insert. The following is a rough |
70 | // approximation. |
71 | if (!Op2Info.isUniform()) |
72 | Cost = |
73 | cast<FixedVectorType>(Val: VTy)->getNumElements() * |
74 | (TargetTransformInfo::TCC_Basic + |
75 | getArithmeticInstrCost(Opcode, Ty: VTy->getElementType(), CostKind) + |
76 | TargetTransformInfo::TCC_Basic); |
77 | break; |
78 | } |
79 | } |
80 | return Cost; |
81 | } |
82 | |
83 | InstructionCost |
84 | WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, |
85 | TTI::TargetCostKind CostKind, |
86 | unsigned Index, Value *Op0, Value *Op1) { |
87 | InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost( |
88 | Opcode, Val, CostKind, Index, Op0, Op1); |
89 | |
90 | // SIMD128's insert/extract currently only take constant indices. |
91 | if (Index == -1u) |
92 | return Cost + 25 * TargetTransformInfo::TCC_Expensive; |
93 | |
94 | return Cost; |
95 | } |
96 | |
97 | TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle( |
98 | const IntrinsicInst *II) const { |
99 | |
100 | switch (II->getIntrinsicID()) { |
101 | default: |
102 | break; |
103 | case Intrinsic::vector_reduce_fadd: |
104 | return TTI::ReductionShuffle::Pairwise; |
105 | } |
106 | return TTI::ReductionShuffle::SplitHalf; |
107 | } |
108 | |
109 | bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller, |
110 | const Function *Callee) const { |
111 | // Allow inlining only when the Callee has a subset of the Caller's |
112 | // features. In principle, we should be able to inline regardless of any |
113 | // features because WebAssembly supports features at module granularity, not |
114 | // function granularity, but without this restriction it would be possible for |
115 | // a module to "forget" about features if all the functions that used them |
116 | // were inlined. |
117 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
118 | |
119 | const FeatureBitset &CallerBits = |
120 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
121 | const FeatureBitset &CalleeBits = |
122 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
123 | |
124 | return (CallerBits & CalleeBits) == CalleeBits; |
125 | } |
126 | |
127 | void WebAssemblyTTIImpl::( |
128 | Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, |
129 | OptimizationRemarkEmitter *ORE) const { |
130 | // Scan the loop: don't unroll loops with calls. This is a standard approach |
131 | // for most (all?) targets. |
132 | for (BasicBlock *BB : L->blocks()) |
133 | for (Instruction &I : *BB) |
134 | if (isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I)) |
135 | if (const Function *F = cast<CallBase>(Val&: I).getCalledFunction()) |
136 | if (isLoweredToCall(F)) |
137 | return; |
138 | |
139 | // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of |
140 | // the various microarchitectures that use the BasicTTI implementation and |
141 | // has been selected through heuristics across multiple cores and runtimes. |
142 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
143 | UP.PartialThreshold = 30; |
144 | |
145 | // Avoid unrolling when optimizing for size. |
146 | UP.OptSizeThreshold = 0; |
147 | UP.PartialOptSizeThreshold = 0; |
148 | |
149 | // Set number of instructions optimized when "back edge" |
150 | // becomes "fall through" to default value of 2. |
151 | UP.BEInsns = 2; |
152 | } |
153 | |
154 | bool WebAssemblyTTIImpl::supportsTailCalls() const { |
155 | return getST()->hasTailCall(); |
156 | } |
157 | |