1 | #include "clang/Basic/Cuda.h" |
2 | |
3 | #include "llvm/ADT/StringRef.h" |
4 | #include "llvm/ADT/Twine.h" |
5 | #include "llvm/Support/ErrorHandling.h" |
6 | #include "llvm/Support/VersionTuple.h" |
7 | |
8 | namespace clang { |
9 | |
10 | struct CudaVersionMapEntry { |
11 | const char *Name; |
12 | CudaVersion Version; |
13 | llvm::VersionTuple TVersion; |
14 | }; |
15 | #define CUDA_ENTRY(major, minor) \ |
16 | { \ |
17 | #major "." #minor, CudaVersion::CUDA_##major##minor, \ |
18 | llvm::VersionTuple(major, minor) \ |
19 | } |
20 | |
21 | static const CudaVersionMapEntry CudaNameVersionMap[] = { |
22 | CUDA_ENTRY(7, 0), |
23 | CUDA_ENTRY(7, 5), |
24 | CUDA_ENTRY(8, 0), |
25 | CUDA_ENTRY(9, 0), |
26 | CUDA_ENTRY(9, 1), |
27 | CUDA_ENTRY(9, 2), |
28 | CUDA_ENTRY(10, 0), |
29 | CUDA_ENTRY(10, 1), |
30 | CUDA_ENTRY(10, 2), |
31 | CUDA_ENTRY(11, 0), |
32 | CUDA_ENTRY(11, 1), |
33 | CUDA_ENTRY(11, 2), |
34 | CUDA_ENTRY(11, 3), |
35 | CUDA_ENTRY(11, 4), |
36 | CUDA_ENTRY(11, 5), |
37 | CUDA_ENTRY(11, 6), |
38 | CUDA_ENTRY(11, 7), |
39 | CUDA_ENTRY(11, 8), |
40 | CUDA_ENTRY(12, 0), |
41 | CUDA_ENTRY(12, 1), |
42 | CUDA_ENTRY(12, 2), |
43 | CUDA_ENTRY(12, 3), |
44 | CUDA_ENTRY(12, 4), |
45 | CUDA_ENTRY(12, 5), |
46 | {.Name: "" , .Version: CudaVersion::NEW, .TVersion: llvm::VersionTuple(std::numeric_limits<int>::max())}, |
47 | {.Name: "unknown" , .Version: CudaVersion::UNKNOWN, .TVersion: {}} // End of list tombstone. |
48 | }; |
49 | #undef CUDA_ENTRY |
50 | |
51 | const char *CudaVersionToString(CudaVersion V) { |
52 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
53 | if (I->Version == V) |
54 | return I->Name; |
55 | |
56 | return CudaVersionToString(V: CudaVersion::UNKNOWN); |
57 | } |
58 | |
59 | CudaVersion CudaStringToVersion(const llvm::Twine &S) { |
60 | std::string VS = S.str(); |
61 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
62 | if (I->Name == VS) |
63 | return I->Version; |
64 | return CudaVersion::UNKNOWN; |
65 | } |
66 | |
67 | CudaVersion ToCudaVersion(llvm::VersionTuple Version) { |
68 | for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
69 | if (I->TVersion == Version) |
70 | return I->Version; |
71 | return CudaVersion::UNKNOWN; |
72 | } |
73 | |
74 | namespace { |
75 | struct OffloadArchToStringMap { |
76 | OffloadArch arch; |
77 | const char *arch_name; |
78 | const char *virtual_arch_name; |
79 | }; |
80 | } // namespace |
81 | |
82 | #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} |
83 | #define SM(sm) SM2(sm, "compute_" #sm) |
84 | #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} |
85 | static const OffloadArchToStringMap arch_names[] = { |
86 | // clang-format off |
87 | {.arch: OffloadArch::UNUSED, .arch_name: "" , .virtual_arch_name: "" }, |
88 | SM2(20, "compute_20" ), SM2(21, "compute_20" ), // Fermi |
89 | SM(30), {.arch: OffloadArch::SM_32_, .arch_name: "sm_32" , .virtual_arch_name: "compute_32" }, SM(35), SM(37), // Kepler |
90 | SM(50), SM(52), SM(53), // Maxwell |
91 | SM(60), SM(61), SM(62), // Pascal |
92 | SM(70), SM(72), // Volta |
93 | SM(75), // Turing |
94 | SM(80), SM(86), // Ampere |
95 | SM(87), // Jetson/Drive AGX Orin |
96 | SM(89), // Ada Lovelace |
97 | SM(90), // Hopper |
98 | SM(90a), // Hopper |
99 | GFX(600), // gfx600 |
100 | GFX(601), // gfx601 |
101 | GFX(602), // gfx602 |
102 | GFX(700), // gfx700 |
103 | GFX(701), // gfx701 |
104 | GFX(702), // gfx702 |
105 | GFX(703), // gfx703 |
106 | GFX(704), // gfx704 |
107 | GFX(705), // gfx705 |
108 | GFX(801), // gfx801 |
109 | GFX(802), // gfx802 |
110 | GFX(803), // gfx803 |
111 | GFX(805), // gfx805 |
112 | GFX(810), // gfx810 |
113 | {.arch: OffloadArch::GFX9_GENERIC, .arch_name: "gfx9-generic" , .virtual_arch_name: "compute_amdgcn" }, |
114 | GFX(900), // gfx900 |
115 | GFX(902), // gfx902 |
116 | GFX(904), // gfx903 |
117 | GFX(906), // gfx906 |
118 | GFX(908), // gfx908 |
119 | GFX(909), // gfx909 |
120 | GFX(90a), // gfx90a |
121 | GFX(90c), // gfx90c |
122 | GFX(940), // gfx940 |
123 | GFX(941), // gfx941 |
124 | GFX(942), // gfx942 |
125 | {.arch: OffloadArch::GFX10_1_GENERIC, .arch_name: "gfx10-1-generic" , .virtual_arch_name: "compute_amdgcn" }, |
126 | GFX(1010), // gfx1010 |
127 | GFX(1011), // gfx1011 |
128 | GFX(1012), // gfx1012 |
129 | GFX(1013), // gfx1013 |
130 | {.arch: OffloadArch::GFX10_3_GENERIC, .arch_name: "gfx10-3-generic" , .virtual_arch_name: "compute_amdgcn" }, |
131 | GFX(1030), // gfx1030 |
132 | GFX(1031), // gfx1031 |
133 | GFX(1032), // gfx1032 |
134 | GFX(1033), // gfx1033 |
135 | GFX(1034), // gfx1034 |
136 | GFX(1035), // gfx1035 |
137 | GFX(1036), // gfx1036 |
138 | {.arch: OffloadArch::GFX11_GENERIC, .arch_name: "gfx11-generic" , .virtual_arch_name: "compute_amdgcn" }, |
139 | GFX(1100), // gfx1100 |
140 | GFX(1101), // gfx1101 |
141 | GFX(1102), // gfx1102 |
142 | GFX(1103), // gfx1103 |
143 | GFX(1150), // gfx1150 |
144 | GFX(1151), // gfx1151 |
145 | GFX(1152), // gfx1152 |
146 | {.arch: OffloadArch::GFX12_GENERIC, .arch_name: "gfx12-generic" , .virtual_arch_name: "compute_amdgcn" }, |
147 | GFX(1200), // gfx1200 |
148 | GFX(1201), // gfx1201 |
149 | {.arch: OffloadArch::AMDGCNSPIRV, .arch_name: "amdgcnspirv" , .virtual_arch_name: "compute_amdgcn" }, |
150 | {.arch: OffloadArch::Generic, .arch_name: "generic" , .virtual_arch_name: "" }, |
151 | // clang-format on |
152 | }; |
153 | #undef SM |
154 | #undef SM2 |
155 | #undef GFX |
156 | |
157 | const char *OffloadArchToString(OffloadArch A) { |
158 | auto result = std::find_if( |
159 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
160 | pred: [A](const OffloadArchToStringMap &map) { return A == map.arch; }); |
161 | if (result == std::end(arr: arch_names)) |
162 | return "unknown" ; |
163 | return result->arch_name; |
164 | } |
165 | |
166 | const char *OffloadArchToVirtualArchString(OffloadArch A) { |
167 | auto result = std::find_if( |
168 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
169 | pred: [A](const OffloadArchToStringMap &map) { return A == map.arch; }); |
170 | if (result == std::end(arr: arch_names)) |
171 | return "unknown" ; |
172 | return result->virtual_arch_name; |
173 | } |
174 | |
175 | OffloadArch StringToOffloadArch(llvm::StringRef S) { |
176 | auto result = std::find_if( |
177 | first: std::begin(arr: arch_names), last: std::end(arr: arch_names), |
178 | pred: [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); |
179 | if (result == std::end(arr: arch_names)) |
180 | return OffloadArch::UNKNOWN; |
181 | return result->arch; |
182 | } |
183 | |
184 | CudaVersion MinVersionForOffloadArch(OffloadArch A) { |
185 | if (A == OffloadArch::UNKNOWN) |
186 | return CudaVersion::UNKNOWN; |
187 | |
188 | // AMD GPUs do not depend on CUDA versions. |
189 | if (IsAMDOffloadArch(A)) |
190 | return CudaVersion::CUDA_70; |
191 | |
192 | switch (A) { |
193 | case OffloadArch::SM_20: |
194 | case OffloadArch::SM_21: |
195 | case OffloadArch::SM_30: |
196 | case OffloadArch::SM_32_: |
197 | case OffloadArch::SM_35: |
198 | case OffloadArch::SM_37: |
199 | case OffloadArch::SM_50: |
200 | case OffloadArch::SM_52: |
201 | case OffloadArch::SM_53: |
202 | return CudaVersion::CUDA_70; |
203 | case OffloadArch::SM_60: |
204 | case OffloadArch::SM_61: |
205 | case OffloadArch::SM_62: |
206 | return CudaVersion::CUDA_80; |
207 | case OffloadArch::SM_70: |
208 | return CudaVersion::CUDA_90; |
209 | case OffloadArch::SM_72: |
210 | return CudaVersion::CUDA_91; |
211 | case OffloadArch::SM_75: |
212 | return CudaVersion::CUDA_100; |
213 | case OffloadArch::SM_80: |
214 | return CudaVersion::CUDA_110; |
215 | case OffloadArch::SM_86: |
216 | return CudaVersion::CUDA_111; |
217 | case OffloadArch::SM_87: |
218 | return CudaVersion::CUDA_114; |
219 | case OffloadArch::SM_89: |
220 | case OffloadArch::SM_90: |
221 | return CudaVersion::CUDA_118; |
222 | case OffloadArch::SM_90a: |
223 | return CudaVersion::CUDA_120; |
224 | default: |
225 | llvm_unreachable("invalid enum" ); |
226 | } |
227 | } |
228 | |
229 | CudaVersion MaxVersionForOffloadArch(OffloadArch A) { |
230 | // AMD GPUs do not depend on CUDA versions. |
231 | if (IsAMDOffloadArch(A)) |
232 | return CudaVersion::NEW; |
233 | |
234 | switch (A) { |
235 | case OffloadArch::UNKNOWN: |
236 | return CudaVersion::UNKNOWN; |
237 | case OffloadArch::SM_20: |
238 | case OffloadArch::SM_21: |
239 | return CudaVersion::CUDA_80; |
240 | case OffloadArch::SM_30: |
241 | case OffloadArch::SM_32_: |
242 | return CudaVersion::CUDA_102; |
243 | case OffloadArch::SM_35: |
244 | case OffloadArch::SM_37: |
245 | return CudaVersion::CUDA_118; |
246 | default: |
247 | return CudaVersion::NEW; |
248 | } |
249 | } |
250 | |
251 | bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) { |
252 | return CudaFeatureEnabled(ToCudaVersion(Version), Feature); |
253 | } |
254 | |
255 | bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { |
256 | switch (Feature) { |
257 | case CudaFeature::CUDA_USES_NEW_LAUNCH: |
258 | return Version >= CudaVersion::CUDA_92; |
259 | case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: |
260 | return Version >= CudaVersion::CUDA_101; |
261 | } |
262 | llvm_unreachable("Unknown CUDA feature." ); |
263 | } |
264 | } // namespace clang |
265 | |