NVPTXTargetTransformInfo.h source code [llvm_projects/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h]

1	//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file a TargetTransformInfoImplBase conforming object specific to the
10	/// NVPTX target machine. It uses the target's detailed information to
11	/// provide more precise answers to certain TTI queries, while letting the
12	/// target independent and default TTI implementations handle the rest.
13	///
14	//===----------------------------------------------------------------------===//
15
16	#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
17	#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
18
19	#include "MCTargetDesc/NVPTXBaseInfo.h"
20	#include "NVPTXTargetMachine.h"
21	#include "NVPTXUtilities.h"
22	#include "llvm/Analysis/TargetTransformInfo.h"
23	#include "llvm/CodeGen/BasicTTIImpl.h"
24	#include "llvm/CodeGen/TargetLowering.h"
25	#include <optional>
26
27	namespace llvm {
28
29	class NVPTXTTIImpl final : public BasicTTIImplBase<NVPTXTTIImpl> {
30	typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
31	typedef TargetTransformInfo TTI;
32	friend BaseT;
33
34	const NVPTXSubtarget *ST;
35	const NVPTXTargetLowering *TLI;
36
37	const NVPTXSubtarget getST() const* { return ST; };
38	const NVPTXTargetLowering getTLI() const* { return TLI; };
39
40	public:
41	explicit NVPTXTTIImpl(const NVPTXTargetMachine TM, const* Function &F)
42	: BaseT (TM, F.getDataLayout()), ST(TM->getSubtargetImpl()),
43	TLI(ST->getTargetLowering()) {}
44
45	bool hasBranchDivergence(const Function F = nullptr) const* override {
46	return true;
47	}
48
49	bool isSourceOfDivergence(const Value V) const* override;
50
51	unsigned getFlatAddressSpace() const override {
52	return AddressSpace::ADDRESS_SPACE_GENERIC;
53	}
54
55	bool
56	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
57	return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
58	AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
59	}
60
61	std::optional<Instruction *>
62	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
63
64	// Loads and stores can be vectorized if the alignment is at least as big as
65	// the load/store we want to vectorize.
66	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
67	unsigned AddrSpace) const override {
68	return Alignment >= ChainSizeInBytes;
69	}
70	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
71	unsigned AddrSpace) const override {
72	return isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, AddrSpace);
73	}
74
75	// NVPTX has infinite registers of all kinds, but the actual machine doesn't.
76	// We conservatively return 1 here which is just enough to enable the
77	// vectorizers but disables heuristics based on the number of registers.
78	// FIXME: Return a more reasonable number, while keeping an eye on
79	// LoopVectorizer's unrolling heuristics.
80	unsigned getNumberOfRegisters(unsigned ClassID) const override { return `1`; }
81
82	// Only <2 x half> should be vectorized, so always return 32 for the vector
83	// register size.
84	TypeSize
85	getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
86	return TypeSize::getFixed(ExactSize: `32`);
87	}
88	unsigned getMinVectorRegisterBitWidth() const override { return `32`; }
89
90	// We don't want to prevent inlining because of target-cpu and -features
91	// attributes that were added to newer versions of LLVM/Clang: There are
92	// no incompatible functions in PTX, ptxas will throw errors in such cases.
93	bool areInlineCompatible(const Function *Caller,
94	const Function Callee) const* override {
95	return true;
96	}
97
98	// Increase the inlining cost threshold by a factor of 11, reflecting that
99	// calls are particularly expensive in NVPTX.
100	unsigned getInliningThresholdMultiplier() const override { return `11`; }
101
102	InstructionCost
103	getInstructionCost(const User U, ArrayRef<const* Value *> Operands,
104	TTI::TargetCostKind CostKind) const override;
105
106	InstructionCost getArithmeticInstrCost(
107	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
108	TTI::OperandValueInfo Op1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
109	TTI::OperandValueInfo Op2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
110	ArrayRef<const Value *> Args = {},
111	const Instruction CxtI = nullptr) const* override;
112
113	InstructionCost getScalarizationOverhead(
114	VectorType InTy, const* APInt &DemandedElts, bool Insert, bool Extract,
115	TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
116	ArrayRef<Value > VL = {}) const* override {
117	if (!InTy->getElementCount().isFixed())
118	return InstructionCost::getInvalid();
119
120	auto VT = getTLI()->getValueType(DL, Ty: InTy);
121	auto NumElements = InTy->getElementCount().getFixedValue();
122	InstructionCost Cost = `0`;
123	if (Insert && !VL.empty()) {
124	bool AllConstant = all_of(Range: seq(Size: NumElements), P: [&](int Idx) {
125	return !DemandedElts [Idx] \|\| isa<Constant>(Val: VL [Idx]);
126	});
127	if (AllConstant) {
128	Cost += TTI::TCC_Free;
129	Insert = false;
130	}
131	}
132	if (Insert && Isv2x16VT(VT)) {
133	// Can be built in a single mov
134	Cost += `1`;
135	Insert = false;
136	}
137	if (Insert && VT == MVT::v4i8) {
138	InstructionCost Cost = `3`; // 3 x PRMT
139	for (auto Idx : seq(Size: NumElements))
140	if (DemandedElts [Idx])
141	Cost += `1`; // zext operand to i32
142	Insert = false;
143	}
144	return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert,
145	Extract, CostKind,
146	ForPoisonSrc, VL);
147	}
148
149	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
150	TTI::UnrollingPreferences &UP,
151	OptimizationRemarkEmitter ORE) const* override;
152
153	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
154	TTI::PeelingPreferences &PP) const override;
155
156	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const override {
157	// Volatile loads/stores are only supported for shared and global address
158	// spaces, or for generic AS that maps to them.
159	if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC \|\|
160	AddrSpace == llvm::ADDRESS_SPACE_GLOBAL \|\|
161	AddrSpace == llvm::ADDRESS_SPACE_SHARED))
162	return false;
163
164	switch(I->getOpcode()){
165	default:
166	return false;
167	case Instruction::Load:
168	case Instruction::Store:
169	return true;
170	}
171	}
172
173	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
174	Intrinsic::ID IID) const override;
175
176	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override;
177
178	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
179	Value NewV) const* override;
180	unsigned getAssumedAddrSpace(const Value V) const* override;
181
182	void collectKernelLaunchBounds(
183	const Function &F,
184	SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const override;
185	};
186
187	} // end namespace llvm
188
189	#endif
190

Browse the source code of llvm_projects/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h