NVPTXSubtarget.cpp source code [llvm_projects/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp]

1	//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the NVPTX specific subclass of TargetSubtarget.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "NVPTXSubtarget.h"
14	#include "NVPTXSelectionDAGInfo.h"
15	#include "NVPTXTargetMachine.h"
16	#include "llvm/Support/ErrorHandling.h"
17	#include "llvm/Support/FormatVariadic.h"
18
19	using namespace llvm;
20
21	#define DEBUG_TYPE "nvptx-subtarget"
22
23	#define GET_SUBTARGETINFO_ENUM
24	#define GET_SUBTARGETINFO_TARGET_DESC
25	#define GET_SUBTARGETINFO_CTOR
26	#include "NVPTXGenSubtargetInfo.inc"
27
28	static cl::opt<bool>
29	NoF16Math("nvptx-no-f16-math", cl::Hidden,
30	cl::desc ("NVPTX Specific: Disable generation of f16 math ops."),
31	cl::init(Val: false));
32
33	static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
34	cl::desc ("NVPTX Specific: Disable generation of "
35	"f32x2 instructions and registers."),
36	cl::init(Val: false));
37
38	// FullSmVersion encoding helpers: SM 10 + suffix offset*
39	// (0 = base, 2 = 'f', 3 = 'a').
40	static constexpr unsigned SM(unsigned Version) { return Version * `10`; }
41	static constexpr unsigned SMF(unsigned Version) { return SM(Version) + `2`; }
42	static constexpr unsigned SMA(unsigned Version) { return SM(Version) + `3`; }
43
44	// Pin the vtable to this file.
45	void NVPTXSubtarget::anchor() {}
46
47	// Returns the minimum PTX version required for a given SM target.
48	// This must be kept in sync with the "Supported Targets" column of the
49	// "PTX Release History" table in the PTX ISA documentation:
50	// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history
51	//
52	// Note: LLVM's minimum supported PTX version is 3.2 (see FeaturePTX in
53	// NVPTX.td), so older SMs that supported earlier PTX versions instead use 3.2
54	// as their effective minimum.
55	static unsigned getMinPTXVersionForSM(unsigned FullSmVersion) {
56	switch (FullSmVersion) {
57	case SM(Version: `20`):
58	case SM(Version: `21`):
59	case SM(Version: `30`):
60	case SM(Version: `35`):
61	return `32`;
62	case SM(Version: `32`):
63	case SM(Version: `50`):
64	return `40`;
65	case SM(Version: `37`):
66	case SM(Version: `52`):
67	return `41`;
68	case SM(Version: `53`):
69	return `42`;
70	case SM(Version: `60`):
71	case SM(Version: `61`):
72	case SM(Version: `62`):
73	return `50`;
74	case SM(Version: `70`):
75	return `60`;
76	case SM(Version: `72`):
77	return `61`;
78	case SM(Version: `75`):
79	return `63`;
80	case SM(Version: `80`):
81	return `70`;
82	case SM(Version: `86`):
83	return `71`;
84	case SM(Version: `87`):
85	return `74`;
86	case SM(Version: `89`):
87	case SM(Version: `90`):
88	return `78`;
89	case SMA(Version: `90`):
90	return `80`;
91	case SM(Version: `100`):
92	case SMA(Version: `100`):
93	case SM(Version: `101`):
94	case SMA(Version: `101`):
95	return `86`;
96	case SM(Version: `120`):
97	case SMA(Version: `120`):
98	return `87`;
99	case SMF(Version: `100`):
100	case SMF(Version: `101`):
101	case SM(Version: `103`):
102	case SMF(Version: `103`):
103	case SMA(Version: `103`):
104	case SMF(Version: `120`):
105	case SM(Version: `121`):
106	case SMF(Version: `121`):
107	case SMA(Version: `121`):
108	return `88`;
109	case SM(Version: `88`):
110	case SM(Version: `110`):
111	case SMF(Version: `110`):
112	case SMA(Version: `110`):
113	return `90`;
114	default:
115	llvm_unreachable("Unknown SM version");
116	}
117	}
118
119	NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
120	StringRef FS) {
121	TargetName = std::string (CPU);
122
123	ParseSubtargetFeatures(CPU: getTargetName(), /TuneCPU=/getTargetName(), FS);
124
125	// Re-map SM version numbers, SmVersion carries the regular SMs which do
126	// have relative order, while FullSmVersion allows distinguishing sm_90 from
127	// sm_90a, which would not* be a subset of sm_91.*
128	SmVersion = getSmVersion();
129
130	unsigned MinPTX = getMinPTXVersionForSM(FullSmVersion);
131
132	if (PTXVersion == `0`) {
133	// User didn't request a specific PTX version; use the minimum for this SM.
134	PTXVersion = MinPTX;
135	} else if (PTXVersion < MinPTX) {
136	// User explicitly requested an insufficient PTX version.
137	reportFatalUsageError(
138	reason: formatv(Fmt: "PTX version {0}.{1} does not support target '{2}'. "
139	"Minimum required PTX version is {3}.{4}. "
140	"Either remove the PTX version to use the default, "
141	"or increase it to at least {3}.{4}.",
142	Vals: PTXVersion / `10`, Vals: PTXVersion % `10`, Vals: getTargetName(), Vals: MinPTX / `10`,
143	Vals: MinPTX % `10`));
144	}
145
146	return *this;
147	}
148
149	NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
150	const std::string &FS,
151	const NVPTXTargetMachine &TM)
152	: NVPTXGenSubtargetInfo (TT, CPU, /TuneCPU/ CPU, FS), PTXVersion(`0`),
153	FullSmVersion(`200`), SmVersion(getSmVersion()),
154	InstrInfo (initializeSubtargetDependencies(CPU, FS)), TLInfo (TM, *this) {
155	TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
156	}
157
158	NVPTXSubtarget::~NVPTXSubtarget() = default;
159
160	const SelectionDAGTargetInfo NVPTXSubtarget::getSelectionDAGInfo() const* {
161	return TSInfo.get();
162	}
163
164	bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
165	ArrayRef<unsigned> SMVersions) const {
166	unsigned PTXVer = getPTXVersion();
167	if (!hasFamilySpecificFeatures() \|\| PTXVer < PTXVersion)
168	return false;
169
170	unsigned SMVer = getSmVersion();
171	return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
172	// sm_101 is a different family, never group it with sm_10x.
173	if (SMVer == `101` \|\| SM == `101`)
174	return SMVer == SM &&
175	// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
176	// supported.
177	!(PTXVer >= `90` && SMVer == `101`);
178
179	return getSmFamilyVersion() == SM / `10` && SMVer >= SM;
180	});
181	}
182
183	bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
184	ArrayRef<unsigned> SMVersions) const {
185	unsigned PTXVer = getPTXVersion();
186	if (!hasArchAccelFeatures() \|\| PTXVer < PTXVersion)
187	return false;
188
189	unsigned SMVer = getSmVersion();
190	return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
191	return SMVer == SM &&
192	// PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
193	// supported.
194	!(PTXVer >= `90` && SMVer == `101`);
195	});
196	}
197
198	bool NVPTXSubtarget::allowFP16Math() const {
199	return hasFP16Math() && NoF16Math == false;
200	}
201
202	bool NVPTXSubtarget::hasF32x2Instructions() const {
203	return SmVersion >= `100` && PTXVersion >= `86` && !NoF32x2;
204	}
205
206	bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const {
207	if (!hasBF16Math())
208	return false;
209
210	switch (Opcode) {
211	// Several BF16 instructions are available on sm_90 only.
212	case ISD::FADD:
213	case ISD::FMUL:
214	case ISD::FSUB:
215	case ISD::SELECT:
216	case ISD::SELECT_CC:
217	case ISD::SETCC:
218	case ISD::FEXP2:
219	case ISD::FCEIL:
220	case ISD::FFLOOR:
221	case ISD::FNEARBYINT:
222	case ISD::FRINT:
223	case ISD::FROUNDEVEN:
224	case ISD::FTRUNC:
225	return getSmVersion() >= `90` && getPTXVersion() >= `78`;
226	// Several BF16 instructions are available on sm_80 only.
227	case ISD::FMINNUM:
228	case ISD::FMAXNUM:
229	case ISD::FMAXNUM_IEEE:
230	case ISD::FMINNUM_IEEE:
231	case ISD::FMAXIMUM:
232	case ISD::FMINIMUM:
233	return getSmVersion() >= `80` && getPTXVersion() >= `70`;
234	}
235	return true;
236	}
237
238	void NVPTXSubtarget::failIfClustersUnsupported(
239	std::string const &FailureMessage) const {
240	if (hasClusters())
241	return;
242
243	report_fatal_error(reason: formatv(
244	Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
245	"Requires SM >= 90 and PTX >= 78.",
246	Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage));
247	}
248

Browse the source code of llvm_projects/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp