1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
15#include "WebAssemblyTargetTransformInfo.h"
16#include "llvm/CodeGen/CostTable.h"
17#include "llvm/Support/Debug.h"
18using namespace llvm;
19
20#define DEBUG_TYPE "wasmtti"
21
22TargetTransformInfo::PopcntSupportKind
23WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
24 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
25 return TargetTransformInfo::PSK_FastHardware;
26}
27
28unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
29 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
30
31 // For SIMD, use at least 16 registers, as a rough guess.
32 bool Vector = (ClassID == 1);
33 if (Vector)
34 Result = std::max(a: Result, b: 16u);
35
36 return Result;
37}
38
39TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
40 TargetTransformInfo::RegisterKind K) const {
41 switch (K) {
42 case TargetTransformInfo::RGK_Scalar:
43 return TypeSize::getFixed(ExactSize: 64);
44 case TargetTransformInfo::RGK_FixedWidthVector:
45 return TypeSize::getFixed(ExactSize: getST()->hasSIMD128() ? 128 : 64);
46 case TargetTransformInfo::RGK_ScalableVector:
47 return TypeSize::getScalable(MinimumSize: 0);
48 }
49
50 llvm_unreachable("Unsupported register kind");
51}
52
53InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
54 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
55 TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
56 ArrayRef<const Value *> Args,
57 const Instruction *CxtI) {
58
59 InstructionCost Cost =
60 BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
61 Opcode, Ty, CostKind, Opd1Info: Op1Info, Opd2Info: Op2Info);
62
63 if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) {
64 switch (Opcode) {
65 case Instruction::LShr:
66 case Instruction::AShr:
67 case Instruction::Shl:
68 // SIMD128's shifts currently only accept a scalar shift count. For each
69 // element, we'll need to extract, op, insert. The following is a rough
70 // approximation.
71 if (!Op2Info.isUniform())
72 Cost =
73 cast<FixedVectorType>(Val: VTy)->getNumElements() *
74 (TargetTransformInfo::TCC_Basic +
75 getArithmeticInstrCost(Opcode, Ty: VTy->getElementType(), CostKind) +
76 TargetTransformInfo::TCC_Basic);
77 break;
78 }
79 }
80 return Cost;
81}
82
83InstructionCost
84WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
85 TTI::TargetCostKind CostKind,
86 unsigned Index, Value *Op0, Value *Op1) {
87 InstructionCost Cost = BasicTTIImplBase::getVectorInstrCost(
88 Opcode, Val, CostKind, Index, Op0, Op1);
89
90 // SIMD128's insert/extract currently only take constant indices.
91 if (Index == -1u)
92 return Cost + 25 * TargetTransformInfo::TCC_Expensive;
93
94 return Cost;
95}
96
97TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(
98 const IntrinsicInst *II) const {
99
100 switch (II->getIntrinsicID()) {
101 default:
102 break;
103 case Intrinsic::vector_reduce_fadd:
104 return TTI::ReductionShuffle::Pairwise;
105 }
106 return TTI::ReductionShuffle::SplitHalf;
107}
108
109bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
110 const Function *Callee) const {
111 // Allow inlining only when the Callee has a subset of the Caller's
112 // features. In principle, we should be able to inline regardless of any
113 // features because WebAssembly supports features at module granularity, not
114 // function granularity, but without this restriction it would be possible for
115 // a module to "forget" about features if all the functions that used them
116 // were inlined.
117 const TargetMachine &TM = getTLI()->getTargetMachine();
118
119 const FeatureBitset &CallerBits =
120 TM.getSubtargetImpl(*Caller)->getFeatureBits();
121 const FeatureBitset &CalleeBits =
122 TM.getSubtargetImpl(*Callee)->getFeatureBits();
123
124 return (CallerBits & CalleeBits) == CalleeBits;
125}
126
127void WebAssemblyTTIImpl::getUnrollingPreferences(
128 Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
129 OptimizationRemarkEmitter *ORE) const {
130 // Scan the loop: don't unroll loops with calls. This is a standard approach
131 // for most (all?) targets.
132 for (BasicBlock *BB : L->blocks())
133 for (Instruction &I : *BB)
134 if (isa<CallInst>(Val: I) || isa<InvokeInst>(Val: I))
135 if (const Function *F = cast<CallBase>(Val&: I).getCalledFunction())
136 if (isLoweredToCall(F))
137 return;
138
139 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
140 // the various microarchitectures that use the BasicTTI implementation and
141 // has been selected through heuristics across multiple cores and runtimes.
142 UP.Partial = UP.Runtime = UP.UpperBound = true;
143 UP.PartialThreshold = 30;
144
145 // Avoid unrolling when optimizing for size.
146 UP.OptSizeThreshold = 0;
147 UP.PartialOptSizeThreshold = 0;
148
149 // Set number of instructions optimized when "back edge"
150 // becomes "fall through" to default value of 2.
151 UP.BEInsns = 2;
152}
153
154bool WebAssemblyTTIImpl::supportsTailCalls() const {
155 return getST()->hasTailCall();
156}
157