1#include "clang/Basic/Cuda.h"
2
3#include "llvm/ADT/StringRef.h"
4#include "llvm/ADT/Twine.h"
5#include "llvm/Support/ErrorHandling.h"
6#include "llvm/Support/VersionTuple.h"
7
8namespace clang {
9
10struct CudaVersionMapEntry {
11 const char *Name;
12 CudaVersion Version;
13 llvm::VersionTuple TVersion;
14};
15#define CUDA_ENTRY(major, minor) \
16 { \
17 #major "." #minor, CudaVersion::CUDA_##major##minor, \
18 llvm::VersionTuple(major, minor) \
19 }
20
21static const CudaVersionMapEntry CudaNameVersionMap[] = {
22 CUDA_ENTRY(7, 0),
23 CUDA_ENTRY(7, 5),
24 CUDA_ENTRY(8, 0),
25 CUDA_ENTRY(9, 0),
26 CUDA_ENTRY(9, 1),
27 CUDA_ENTRY(9, 2),
28 CUDA_ENTRY(10, 0),
29 CUDA_ENTRY(10, 1),
30 CUDA_ENTRY(10, 2),
31 CUDA_ENTRY(11, 0),
32 CUDA_ENTRY(11, 1),
33 CUDA_ENTRY(11, 2),
34 CUDA_ENTRY(11, 3),
35 CUDA_ENTRY(11, 4),
36 CUDA_ENTRY(11, 5),
37 CUDA_ENTRY(11, 6),
38 CUDA_ENTRY(11, 7),
39 CUDA_ENTRY(11, 8),
40 CUDA_ENTRY(12, 0),
41 CUDA_ENTRY(12, 1),
42 CUDA_ENTRY(12, 2),
43 CUDA_ENTRY(12, 3),
44 CUDA_ENTRY(12, 4),
45 CUDA_ENTRY(12, 5),
46 {.Name: "", .Version: CudaVersion::NEW, .TVersion: llvm::VersionTuple(std::numeric_limits<int>::max())},
47 {.Name: "unknown", .Version: CudaVersion::UNKNOWN, .TVersion: {}} // End of list tombstone.
48};
49#undef CUDA_ENTRY
50
51const char *CudaVersionToString(CudaVersion V) {
52 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I)
53 if (I->Version == V)
54 return I->Name;
55
56 return CudaVersionToString(V: CudaVersion::UNKNOWN);
57}
58
59CudaVersion CudaStringToVersion(const llvm::Twine &S) {
60 std::string VS = S.str();
61 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I)
62 if (I->Name == VS)
63 return I->Version;
64 return CudaVersion::UNKNOWN;
65}
66
67CudaVersion ToCudaVersion(llvm::VersionTuple Version) {
68 for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I)
69 if (I->TVersion == Version)
70 return I->Version;
71 return CudaVersion::UNKNOWN;
72}
73
74namespace {
75struct OffloadArchToStringMap {
76 OffloadArch arch;
77 const char *arch_name;
78 const char *virtual_arch_name;
79};
80} // namespace
81
82#define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
83#define SM(sm) SM2(sm, "compute_" #sm)
84#define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
85static const OffloadArchToStringMap arch_names[] = {
86 // clang-format off
87 {.arch: OffloadArch::UNUSED, .arch_name: "", .virtual_arch_name: ""},
88 SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
89 SM(30), {.arch: OffloadArch::SM_32_, .arch_name: "sm_32", .virtual_arch_name: "compute_32"}, SM(35), SM(37), // Kepler
90 SM(50), SM(52), SM(53), // Maxwell
91 SM(60), SM(61), SM(62), // Pascal
92 SM(70), SM(72), // Volta
93 SM(75), // Turing
94 SM(80), SM(86), // Ampere
95 SM(87), // Jetson/Drive AGX Orin
96 SM(89), // Ada Lovelace
97 SM(90), // Hopper
98 SM(90a), // Hopper
99 GFX(600), // gfx600
100 GFX(601), // gfx601
101 GFX(602), // gfx602
102 GFX(700), // gfx700
103 GFX(701), // gfx701
104 GFX(702), // gfx702
105 GFX(703), // gfx703
106 GFX(704), // gfx704
107 GFX(705), // gfx705
108 GFX(801), // gfx801
109 GFX(802), // gfx802
110 GFX(803), // gfx803
111 GFX(805), // gfx805
112 GFX(810), // gfx810
113 {.arch: OffloadArch::GFX9_GENERIC, .arch_name: "gfx9-generic", .virtual_arch_name: "compute_amdgcn"},
114 GFX(900), // gfx900
115 GFX(902), // gfx902
116 GFX(904), // gfx903
117 GFX(906), // gfx906
118 GFX(908), // gfx908
119 GFX(909), // gfx909
120 GFX(90a), // gfx90a
121 GFX(90c), // gfx90c
122 GFX(940), // gfx940
123 GFX(941), // gfx941
124 GFX(942), // gfx942
125 {.arch: OffloadArch::GFX10_1_GENERIC, .arch_name: "gfx10-1-generic", .virtual_arch_name: "compute_amdgcn"},
126 GFX(1010), // gfx1010
127 GFX(1011), // gfx1011
128 GFX(1012), // gfx1012
129 GFX(1013), // gfx1013
130 {.arch: OffloadArch::GFX10_3_GENERIC, .arch_name: "gfx10-3-generic", .virtual_arch_name: "compute_amdgcn"},
131 GFX(1030), // gfx1030
132 GFX(1031), // gfx1031
133 GFX(1032), // gfx1032
134 GFX(1033), // gfx1033
135 GFX(1034), // gfx1034
136 GFX(1035), // gfx1035
137 GFX(1036), // gfx1036
138 {.arch: OffloadArch::GFX11_GENERIC, .arch_name: "gfx11-generic", .virtual_arch_name: "compute_amdgcn"},
139 GFX(1100), // gfx1100
140 GFX(1101), // gfx1101
141 GFX(1102), // gfx1102
142 GFX(1103), // gfx1103
143 GFX(1150), // gfx1150
144 GFX(1151), // gfx1151
145 GFX(1152), // gfx1152
146 {.arch: OffloadArch::GFX12_GENERIC, .arch_name: "gfx12-generic", .virtual_arch_name: "compute_amdgcn"},
147 GFX(1200), // gfx1200
148 GFX(1201), // gfx1201
149 {.arch: OffloadArch::AMDGCNSPIRV, .arch_name: "amdgcnspirv", .virtual_arch_name: "compute_amdgcn"},
150 {.arch: OffloadArch::Generic, .arch_name: "generic", .virtual_arch_name: ""},
151 // clang-format on
152};
153#undef SM
154#undef SM2
155#undef GFX
156
157const char *OffloadArchToString(OffloadArch A) {
158 auto result = std::find_if(
159 first: std::begin(arr: arch_names), last: std::end(arr: arch_names),
160 pred: [A](const OffloadArchToStringMap &map) { return A == map.arch; });
161 if (result == std::end(arr: arch_names))
162 return "unknown";
163 return result->arch_name;
164}
165
166const char *OffloadArchToVirtualArchString(OffloadArch A) {
167 auto result = std::find_if(
168 first: std::begin(arr: arch_names), last: std::end(arr: arch_names),
169 pred: [A](const OffloadArchToStringMap &map) { return A == map.arch; });
170 if (result == std::end(arr: arch_names))
171 return "unknown";
172 return result->virtual_arch_name;
173}
174
175OffloadArch StringToOffloadArch(llvm::StringRef S) {
176 auto result = std::find_if(
177 first: std::begin(arr: arch_names), last: std::end(arr: arch_names),
178 pred: [S](const OffloadArchToStringMap &map) { return S == map.arch_name; });
179 if (result == std::end(arr: arch_names))
180 return OffloadArch::UNKNOWN;
181 return result->arch;
182}
183
184CudaVersion MinVersionForOffloadArch(OffloadArch A) {
185 if (A == OffloadArch::UNKNOWN)
186 return CudaVersion::UNKNOWN;
187
188 // AMD GPUs do not depend on CUDA versions.
189 if (IsAMDOffloadArch(A))
190 return CudaVersion::CUDA_70;
191
192 switch (A) {
193 case OffloadArch::SM_20:
194 case OffloadArch::SM_21:
195 case OffloadArch::SM_30:
196 case OffloadArch::SM_32_:
197 case OffloadArch::SM_35:
198 case OffloadArch::SM_37:
199 case OffloadArch::SM_50:
200 case OffloadArch::SM_52:
201 case OffloadArch::SM_53:
202 return CudaVersion::CUDA_70;
203 case OffloadArch::SM_60:
204 case OffloadArch::SM_61:
205 case OffloadArch::SM_62:
206 return CudaVersion::CUDA_80;
207 case OffloadArch::SM_70:
208 return CudaVersion::CUDA_90;
209 case OffloadArch::SM_72:
210 return CudaVersion::CUDA_91;
211 case OffloadArch::SM_75:
212 return CudaVersion::CUDA_100;
213 case OffloadArch::SM_80:
214 return CudaVersion::CUDA_110;
215 case OffloadArch::SM_86:
216 return CudaVersion::CUDA_111;
217 case OffloadArch::SM_87:
218 return CudaVersion::CUDA_114;
219 case OffloadArch::SM_89:
220 case OffloadArch::SM_90:
221 return CudaVersion::CUDA_118;
222 case OffloadArch::SM_90a:
223 return CudaVersion::CUDA_120;
224 default:
225 llvm_unreachable("invalid enum");
226 }
227}
228
229CudaVersion MaxVersionForOffloadArch(OffloadArch A) {
230 // AMD GPUs do not depend on CUDA versions.
231 if (IsAMDOffloadArch(A))
232 return CudaVersion::NEW;
233
234 switch (A) {
235 case OffloadArch::UNKNOWN:
236 return CudaVersion::UNKNOWN;
237 case OffloadArch::SM_20:
238 case OffloadArch::SM_21:
239 return CudaVersion::CUDA_80;
240 case OffloadArch::SM_30:
241 case OffloadArch::SM_32_:
242 return CudaVersion::CUDA_102;
243 case OffloadArch::SM_35:
244 case OffloadArch::SM_37:
245 return CudaVersion::CUDA_118;
246 default:
247 return CudaVersion::NEW;
248 }
249}
250
251bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) {
252 return CudaFeatureEnabled(ToCudaVersion(Version), Feature);
253}
254
255bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) {
256 switch (Feature) {
257 case CudaFeature::CUDA_USES_NEW_LAUNCH:
258 return Version >= CudaVersion::CUDA_92;
259 case CudaFeature::CUDA_USES_FATBIN_REGISTER_END:
260 return Version >= CudaVersion::CUDA_101;
261 }
262 llvm_unreachable("Unknown CUDA feature.");
263}
264} // namespace clang
265