1//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the NVPTX specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "NVPTXSubtarget.h"
14#include "NVPTXSelectionDAGInfo.h"
15#include "NVPTXTargetMachine.h"
16#include "llvm/Support/ErrorHandling.h"
17#include "llvm/Support/FormatVariadic.h"
18
19using namespace llvm;
20
21#define DEBUG_TYPE "nvptx-subtarget"
22
23#define GET_SUBTARGETINFO_ENUM
24#define GET_SUBTARGETINFO_TARGET_DESC
25#define GET_SUBTARGETINFO_CTOR
26#include "NVPTXGenSubtargetInfo.inc"
27
28static cl::opt<bool>
29 NoF16Math("nvptx-no-f16-math", cl::Hidden,
30 cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
31 cl::init(Val: false));
32
33static cl::opt<bool> NoF32x2("nvptx-no-f32x2", cl::Hidden,
34 cl::desc("NVPTX Specific: Disable generation of "
35 "f32x2 instructions and registers."),
36 cl::init(Val: false));
37
38// FullSmVersion encoding helpers: SM * 10 + suffix offset
39// (0 = base, 2 = 'f', 3 = 'a').
40static constexpr unsigned SM(unsigned Version) { return Version * 10; }
41static constexpr unsigned SMF(unsigned Version) { return SM(Version) + 2; }
42static constexpr unsigned SMA(unsigned Version) { return SM(Version) + 3; }
43
44// Pin the vtable to this file.
45void NVPTXSubtarget::anchor() {}
46
47// Returns the minimum PTX version required for a given SM target.
48// This must be kept in sync with the "Supported Targets" column of the
49// "PTX Release History" table in the PTX ISA documentation:
50// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history
51//
52// Note: LLVM's minimum supported PTX version is 3.2 (see FeaturePTX in
53// NVPTX.td), so older SMs that supported earlier PTX versions instead use 3.2
54// as their effective minimum.
55static unsigned getMinPTXVersionForSM(unsigned FullSmVersion) {
56 switch (FullSmVersion) {
57 case SM(Version: 20):
58 case SM(Version: 21):
59 case SM(Version: 30):
60 case SM(Version: 35):
61 return 32;
62 case SM(Version: 32):
63 case SM(Version: 50):
64 return 40;
65 case SM(Version: 37):
66 case SM(Version: 52):
67 return 41;
68 case SM(Version: 53):
69 return 42;
70 case SM(Version: 60):
71 case SM(Version: 61):
72 case SM(Version: 62):
73 return 50;
74 case SM(Version: 70):
75 return 60;
76 case SM(Version: 72):
77 return 61;
78 case SM(Version: 75):
79 return 63;
80 case SM(Version: 80):
81 return 70;
82 case SM(Version: 86):
83 return 71;
84 case SM(Version: 87):
85 return 74;
86 case SM(Version: 89):
87 case SM(Version: 90):
88 return 78;
89 case SMA(Version: 90):
90 return 80;
91 case SM(Version: 100):
92 case SMA(Version: 100):
93 case SM(Version: 101):
94 case SMA(Version: 101):
95 return 86;
96 case SM(Version: 120):
97 case SMA(Version: 120):
98 return 87;
99 case SMF(Version: 100):
100 case SMF(Version: 101):
101 case SM(Version: 103):
102 case SMF(Version: 103):
103 case SMA(Version: 103):
104 case SMF(Version: 120):
105 case SM(Version: 121):
106 case SMF(Version: 121):
107 case SMA(Version: 121):
108 return 88;
109 case SM(Version: 88):
110 case SM(Version: 110):
111 case SMF(Version: 110):
112 case SMA(Version: 110):
113 return 90;
114 default:
115 llvm_unreachable("Unknown SM version");
116 }
117}
118
119NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
120 StringRef FS) {
121 TargetName = std::string(CPU);
122
123 ParseSubtargetFeatures(CPU: getTargetName(), /*TuneCPU=*/getTargetName(), FS);
124
125 // Re-map SM version numbers, SmVersion carries the regular SMs which do
126 // have relative order, while FullSmVersion allows distinguishing sm_90 from
127 // sm_90a, which would *not* be a subset of sm_91.
128 SmVersion = getSmVersion();
129
130 unsigned MinPTX = getMinPTXVersionForSM(FullSmVersion);
131
132 if (PTXVersion == 0) {
133 // User didn't request a specific PTX version; use the minimum for this SM.
134 PTXVersion = MinPTX;
135 } else if (PTXVersion < MinPTX) {
136 // User explicitly requested an insufficient PTX version.
137 reportFatalUsageError(
138 reason: formatv(Fmt: "PTX version {0}.{1} does not support target '{2}'. "
139 "Minimum required PTX version is {3}.{4}. "
140 "Either remove the PTX version to use the default, "
141 "or increase it to at least {3}.{4}.",
142 Vals: PTXVersion / 10, Vals: PTXVersion % 10, Vals: getTargetName(), Vals: MinPTX / 10,
143 Vals: MinPTX % 10));
144 }
145
146 return *this;
147}
148
149NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
150 const std::string &FS,
151 const NVPTXTargetMachine &TM)
152 : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
153 FullSmVersion(200), SmVersion(getSmVersion()),
154 InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {
155 TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
156}
157
158NVPTXSubtarget::~NVPTXSubtarget() = default;
159
160const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {
161 return TSInfo.get();
162}
163
164bool NVPTXSubtarget::hasPTXWithFamilySMs(unsigned PTXVersion,
165 ArrayRef<unsigned> SMVersions) const {
166 unsigned PTXVer = getPTXVersion();
167 if (!hasFamilySpecificFeatures() || PTXVer < PTXVersion)
168 return false;
169
170 unsigned SMVer = getSmVersion();
171 return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
172 // sm_101 is a different family, never group it with sm_10x.
173 if (SMVer == 101 || SM == 101)
174 return SMVer == SM &&
175 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
176 // supported.
177 !(PTXVer >= 90 && SMVer == 101);
178
179 return getSmFamilyVersion() == SM / 10 && SMVer >= SM;
180 });
181}
182
183bool NVPTXSubtarget::hasPTXWithAccelSMs(unsigned PTXVersion,
184 ArrayRef<unsigned> SMVersions) const {
185 unsigned PTXVer = getPTXVersion();
186 if (!hasArchAccelFeatures() || PTXVer < PTXVersion)
187 return false;
188
189 unsigned SMVer = getSmVersion();
190 return llvm::any_of(Range&: SMVersions, P: [&](unsigned SM) {
191 return SMVer == SM &&
192 // PTX 9.0 and later renamed sm_101 to sm_110, so sm_101 is not
193 // supported.
194 !(PTXVer >= 90 && SMVer == 101);
195 });
196}
197
198bool NVPTXSubtarget::allowFP16Math() const {
199 return hasFP16Math() && NoF16Math == false;
200}
201
202bool NVPTXSubtarget::hasF32x2Instructions() const {
203 return SmVersion >= 100 && PTXVersion >= 86 && !NoF32x2;
204}
205
206bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const {
207 if (!hasBF16Math())
208 return false;
209
210 switch (Opcode) {
211 // Several BF16 instructions are available on sm_90 only.
212 case ISD::FADD:
213 case ISD::FMUL:
214 case ISD::FSUB:
215 case ISD::SELECT:
216 case ISD::SELECT_CC:
217 case ISD::SETCC:
218 case ISD::FEXP2:
219 case ISD::FCEIL:
220 case ISD::FFLOOR:
221 case ISD::FNEARBYINT:
222 case ISD::FRINT:
223 case ISD::FROUNDEVEN:
224 case ISD::FTRUNC:
225 return getSmVersion() >= 90 && getPTXVersion() >= 78;
226 // Several BF16 instructions are available on sm_80 only.
227 case ISD::FMINNUM:
228 case ISD::FMAXNUM:
229 case ISD::FMAXNUM_IEEE:
230 case ISD::FMINNUM_IEEE:
231 case ISD::FMAXIMUM:
232 case ISD::FMINIMUM:
233 return getSmVersion() >= 80 && getPTXVersion() >= 70;
234 }
235 return true;
236}
237
238void NVPTXSubtarget::failIfClustersUnsupported(
239 std::string const &FailureMessage) const {
240 if (hasClusters())
241 return;
242
243 report_fatal_error(reason: formatv(
244 Fmt: "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "
245 "Requires SM >= 90 and PTX >= 78.",
246 Vals: getFullSmVersion(), Vals: PTXVersion, Vals: FailureMessage));
247}
248