1//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file a TargetTransformInfoImplBase conforming object specific to the
10/// NVPTX target machine. It uses the target's detailed information to
11/// provide more precise answers to certain TTI queries, while letting the
12/// target independent and default TTI implementations handle the rest.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
19#include "MCTargetDesc/NVPTXBaseInfo.h"
20#include "NVPTXTargetMachine.h"
21#include "NVPTXUtilities.h"
22#include "llvm/Analysis/TargetTransformInfo.h"
23#include "llvm/CodeGen/BasicTTIImpl.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include <optional>
26
27namespace llvm {
28
29class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
30 typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
31 typedef TargetTransformInfo TTI;
32 friend BaseT;
33
34 const NVPTXSubtarget *ST;
35 const NVPTXTargetLowering *TLI;
36
37 const NVPTXSubtarget *getST() const { return ST; };
38 const NVPTXTargetLowering *getTLI() const { return TLI; };
39
40public:
41 explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
42 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
43 TLI(ST->getTargetLowering()) {}
44
45 bool hasBranchDivergence(const Function *F = nullptr) const override {
46 return true;
47 }
48
49 bool isSourceOfDivergence(const Value *V) const override;
50
51 unsigned getFlatAddressSpace() const override {
52 return AddressSpace::ADDRESS_SPACE_GENERIC;
53 }
54
55 bool
56 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
57 return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
58 AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
59 }
60
61 std::optional<Instruction *>
62 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
63
64 // Loads and stores can be vectorized if the alignment is at least as big as
65 // the load/store we want to vectorize.
66 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
67 unsigned AddrSpace) const override {
68 return Alignment >= ChainSizeInBytes;
69 }
70 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
71 unsigned AddrSpace) const override {
72 return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
73 }
74
75 // NVPTX has infinite registers of all kinds, but the actual machine doesn't.
76 // We conservatively return 1 here which is just enough to enable the
77 // vectorizers but disables heuristics based on the number of registers.
78 // FIXME: Return a more reasonable number, while keeping an eye on
79 // LoopVectorizer's unrolling heuristics.
80 unsigned getNumberOfRegisters(unsigned ClassID) const override { return 1; }
81
82 // Only <2 x half> should be vectorized, so always return 32 for the vector
83 // register size.
84 TypeSize
85 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
86 return TypeSize::getFixed(ExactSize: 32);
87 }
88 unsigned getMinVectorRegisterBitWidth() const override { return 32; }
89
90 // We don't want to prevent inlining because of target-cpu and -features
91 // attributes that were added to newer versions of LLVM/Clang: There are
92 // no incompatible functions in PTX, ptxas will throw errors in such cases.
93 bool areInlineCompatible(const Function *Caller,
94 const Function *Callee) const override {
95 return true;
96 }
97
98 // Increase the inlining cost threshold by a factor of 11, reflecting that
99 // calls are particularly expensive in NVPTX.
100 unsigned getInliningThresholdMultiplier() const override { return 11; }
101
102 InstructionCost
103 getInstructionCost(const User *U, ArrayRef<const Value *> Operands,
104 TTI::TargetCostKind CostKind) const override;
105
106 InstructionCost getArithmeticInstrCost(
107 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
108 TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
109 TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
110 ArrayRef<const Value *> Args = {},
111 const Instruction *CxtI = nullptr) const override;
112
113 InstructionCost getScalarizationOverhead(
114 VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract,
115 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
116 ArrayRef<Value *> VL = {}) const override {
117 if (!InTy->getElementCount().isFixed())
118 return InstructionCost::getInvalid();
119
120 auto VT = getTLI()->getValueType(DL, Ty: InTy);
121 auto NumElements = InTy->getElementCount().getFixedValue();
122 InstructionCost Cost = 0;
123 if (Insert && !VL.empty()) {
124 bool AllConstant = all_of(Range: seq(Size: NumElements), P: [&](int Idx) {
125 return !DemandedElts[Idx] || isa<Constant>(Val: VL[Idx]);
126 });
127 if (AllConstant) {
128 Cost += TTI::TCC_Free;
129 Insert = false;
130 }
131 }
132 if (Insert && Isv2x16VT(VT)) {
133 // Can be built in a single mov
134 Cost += 1;
135 Insert = false;
136 }
137 if (Insert && VT == MVT::v4i8) {
138 InstructionCost Cost = 3; // 3 x PRMT
139 for (auto Idx : seq(Size: NumElements))
140 if (DemandedElts[Idx])
141 Cost += 1; // zext operand to i32
142 Insert = false;
143 }
144 return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
145 Extract, CostKind,
146 ForPoisonSrc, VL);
147 }
148
149 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
150 TTI::UnrollingPreferences &UP,
151 OptimizationRemarkEmitter *ORE) const override;
152
153 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
154 TTI::PeelingPreferences &PP) const override;
155
156 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const override {
157 // Volatile loads/stores are only supported for shared and global address
158 // spaces, or for generic AS that maps to them.
159 if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC ||
160 AddrSpace == llvm::ADDRESS_SPACE_GLOBAL ||
161 AddrSpace == llvm::ADDRESS_SPACE_SHARED))
162 return false;
163
164 switch(I->getOpcode()){
165 default:
166 return false;
167 case Instruction::Load:
168 case Instruction::Store:
169 return true;
170 }
171 }
172
173 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
174 Intrinsic::ID IID) const override;
175
176 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
177
178 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
179 Value *NewV) const override;
180 unsigned getAssumedAddrSpace(const Value *V) const override;
181
182 void collectKernelLaunchBounds(
183 const Function &F,
184 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
185};
186
187} // end namespace llvm
188
189#endif
190