1//===-- AMDGPUTargetParser - Parser for AMDGPU features ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a target parser to recognise AMDGPU hardware features.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/TargetParser/AMDGPUTargetParser.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringSwitch.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/raw_ostream.h"
20#include "llvm/TargetParser/Triple.h"
21
22using namespace llvm;
23using namespace AMDGPU;
24
25StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
26 StringRef ArchName = getArchNameAMDGCN(AK);
27 assert((AK >= GK_AMDGCN_GENERIC_FIRST && AK <= GK_AMDGCN_GENERIC_LAST) ==
28 ArchName.ends_with("-generic") &&
29 "Generic AMDGCN arch not classified correctly!");
30 if (AK >= GK_AMDGCN_GENERIC_FIRST && AK <= GK_AMDGCN_GENERIC_LAST) {
31 // Return the part before the first '-', e.g. "gfx9-4-generic" -> "gfx9".
32 return ArchName.take_front(N: ArchName.find(C: '-'));
33 }
34 return ArchName.empty() ? "" : ArchName.drop_back(N: 2);
35}
36
37StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
38 switch (AK) {
39#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
40 case ENUM: \
41 return NAME;
42#include "llvm/TargetParser/AMDGPUTargetParser.def"
43 default:
44 return "";
45 }
46}
47
48StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
49 switch (AK) {
50#define R600_GPU(NAME, ENUM, FEATURES) \
51 case ENUM: \
52 return NAME;
53#include "llvm/TargetParser/AMDGPUTargetParser.def"
54 default:
55 return "";
56 }
57}
58
59AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
60 return StringSwitch<AMDGPU::GPUKind>(CPU)
61#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) .Case(NAME, ENUM)
62#define AMDGCN_GPU_ALIAS(NAME, ENUM) .Case(NAME, ENUM)
63#include "llvm/TargetParser/AMDGPUTargetParser.def"
64 .Case(S: "generic", Value: AMDGPU::GPUKind::GK_GFX600)
65 .Case(S: "generic-hsa", Value: AMDGPU::GPUKind::GK_GFX700)
66 .Default(Value: AMDGPU::GPUKind::GK_NONE);
67}
68
69AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
70 return StringSwitch<AMDGPU::GPUKind>(CPU)
71#define R600_GPU(NAME, ENUM, FEATURES) .Case(NAME, ENUM)
72#define R600_GPU_ALIAS(NAME, ENUM) .Case(NAME, ENUM)
73#include "llvm/TargetParser/AMDGPUTargetParser.def"
74 .Default(Value: AMDGPU::GPUKind::GK_NONE);
75}
76
77unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
78 switch (AK) {
79#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
80 case ENUM: \
81 return FEATURES;
82#include "llvm/TargetParser/AMDGPUTargetParser.def"
83 default:
84 return FEATURE_NONE;
85 }
86}
87
88unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
89 switch (AK) {
90#define R600_GPU(NAME, ENUM, FEATURES) \
91 case ENUM: \
92 return FEATURES;
93#include "llvm/TargetParser/AMDGPUTargetParser.def"
94 default:
95 return FEATURE_NONE;
96 }
97}
98
99void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
100 // XXX: Should this only report unique canonical names?
101 Values.append(IL: {
102#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) NAME,
103#define AMDGCN_GPU_ALIAS(NAME, ENUM) NAME,
104#include "llvm/TargetParser/AMDGPUTargetParser.def"
105 });
106}
107
108void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
109 Values.append(IL: {
110#define R600_GPU(NAME, ENUM, FEATURES) NAME,
111#define R600_GPU_ALIAS(NAME, ENUM) NAME,
112#include "llvm/TargetParser/AMDGPUTargetParser.def"
113 });
114}
115
116AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
117 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
118 if (AK == AMDGPU::GPUKind::GK_NONE) {
119 if (GPU == "generic-hsa")
120 return {.Major: 7, .Minor: 0, .Stepping: 0};
121 if (GPU == "generic")
122 return {.Major: 6, .Minor: 0, .Stepping: 0};
123 return {.Major: 0, .Minor: 0, .Stepping: 0};
124 }
125
126 switch (AK) {
127#define MAKE_ISAVERSION(A, B, C) {A, B, C}
128#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
129 case ENUM: \
130 return MAKE_ISAVERSION ISAVERSION;
131#include "llvm/TargetParser/AMDGPUTargetParser.def"
132#undef MAKE_ISAVERSION
133 default:
134 return {.Major: 0, .Minor: 0, .Stepping: 0};
135 }
136}
137
138StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
139 assert(T.isAMDGPU());
140 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(CPU: Arch) : parseArchR600(CPU: Arch);
141 if (ProcKind == GK_NONE)
142 return StringRef();
143
144 return T.isAMDGCN() ? getArchNameAMDGCN(AK: ProcKind) : getArchNameR600(AK: ProcKind);
145}
146
147static std::pair<FeatureError, StringRef>
148insertWaveSizeFeature(StringRef GPU, const Triple &T,
149 const StringMap<bool> &DefaultFeatures,
150 StringMap<bool> &Features) {
151 const bool IsNullGPU = GPU.empty();
152 const bool TargetHasWave32 = DefaultFeatures.count(Key: "wavefrontsize32");
153 const bool TargetHasWave64 = DefaultFeatures.count(Key: "wavefrontsize64");
154
155 auto Wave32Itr = Features.find(Key: "wavefrontsize32");
156 auto Wave64Itr = Features.find(Key: "wavefrontsize64");
157 const bool EnableWave32 =
158 Wave32Itr != Features.end() && Wave32Itr->getValue();
159 const bool EnableWave64 =
160 Wave64Itr != Features.end() && Wave64Itr->getValue();
161 const bool DisableWave32 =
162 Wave32Itr != Features.end() && !Wave32Itr->getValue();
163 const bool DisableWave64 =
164 Wave64Itr != Features.end() && !Wave64Itr->getValue();
165
166 if (EnableWave32 && EnableWave64)
167 return {AMDGPU::INVALID_FEATURE_COMBINATION,
168 "'+wavefrontsize32' and '+wavefrontsize64' are mutually exclusive"};
169 if (DisableWave32 && DisableWave64)
170 return {AMDGPU::INVALID_FEATURE_COMBINATION,
171 "'-wavefrontsize32' and '-wavefrontsize64' are mutually exclusive"};
172
173 if (!IsNullGPU) {
174 if (TargetHasWave64) {
175 if (EnableWave32)
176 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize32"};
177 if (DisableWave64)
178 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize64"};
179 }
180
181 if (TargetHasWave32) {
182 if (EnableWave64)
183 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize64"};
184 if (DisableWave32)
185 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize32"};
186 }
187 }
188
189 // Don't assume any wavesize with an unknown subtarget.
190 // Default to wave32 if target supports both.
191 if (!IsNullGPU && !EnableWave32 && !EnableWave64 && !TargetHasWave32 &&
192 !TargetHasWave64)
193 Features.insert(KV: std::make_pair(x: "wavefrontsize32", y: true));
194
195 for (const auto &Entry : DefaultFeatures) {
196 if (!Features.count(Key: Entry.getKey()))
197 Features[Entry.getKey()] = Entry.getValue();
198 }
199
200 return {NO_ERROR, StringRef()};
201}
202
203/// Fills Features map with default values for given target GPU.
204/// \p Features contains overriding target features and this function returns
205/// default target features with entries overridden by \p Features.
206static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
207 StringMap<bool> &Features) {
208 AMDGPU::GPUKind Kind = parseArchAMDGCN(CPU: GPU);
209 switch (Kind) {
210 case GK_GFX1251:
211 Features["gfx1251-gemm-insts"] = true;
212 [[fallthrough]];
213 case GK_GFX1250:
214 Features["swmmac-gfx1200-insts"] = true;
215 Features["swmmac-gfx1250-insts"] = true;
216 [[fallthrough]];
217 case GK_GFX1310:
218 case GK_GFX13_GENERIC:
219 Features["cube-insts"] = true;
220 Features["cvt-pknorm-vop2-insts"] = true;
221 Features["lerp-inst"] = true;
222 Features["qsad-insts"] = true;
223 Features["sad-insts"] = true;
224 Features["msad-insts"] = true;
225 Features["mqsad-pk-insts"] = true;
226 Features["mqsad-insts"] = true;
227 [[fallthrough]];
228 case GK_GFX12_5_GENERIC:
229 Features["ci-insts"] = true;
230 Features["dot7-insts"] = true;
231 Features["dot8-insts"] = true;
232 Features["dl-insts"] = true;
233 Features["16-bit-insts"] = true;
234 Features["dpp"] = true;
235 Features["gfx8-insts"] = true;
236 Features["gfx9-insts"] = true;
237 Features["flat-global-insts"] = true;
238 Features["gfx10-insts"] = true;
239 Features["gfx10-3-insts"] = true;
240 Features["gfx11-insts"] = true;
241 Features["gfx12-insts"] = true;
242 Features["gfx1250-insts"] = true;
243 Features["bitop3-insts"] = true;
244 Features["prng-inst"] = true;
245 Features["tanh-insts"] = true;
246 Features["tensor-cvt-lut-insts"] = true;
247 Features["transpose-load-f4f6-insts"] = true;
248 Features["bf16-trans-insts"] = true;
249 Features["bf16-cvt-insts"] = true;
250 Features["bf16-pk-insts"] = true;
251 Features["fp8-conversion-insts"] = true;
252 Features["fp8e5m3-insts"] = true;
253 Features["permlane16-swap"] = true;
254 Features["ashr-pk-insts"] = true;
255 Features["add-min-max-insts"] = true;
256 Features["pk-add-min-max-insts"] = true;
257 Features["atomic-buffer-pk-add-bf16-inst"] = true;
258 Features["vmem-pref-insts"] = true;
259 Features["atomic-fadd-rtn-insts"] = true;
260 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
261 Features["atomic-flat-pk-add-16-insts"] = true;
262 Features["atomic-global-pk-add-bf16-inst"] = true;
263 Features["atomic-ds-pk-add-16-insts"] = true;
264 Features["setprio-inc-wg-inst"] = true;
265 Features["s-wakeup-barrier-inst"] = true;
266 Features["atomic-fmin-fmax-global-f32"] = true;
267 Features["atomic-fmin-fmax-global-f64"] = true;
268 Features["wavefrontsize32"] = true;
269 Features["clusters"] = true;
270 Features["mcast-load-insts"] = true;
271 Features["asynccnt"] = true;
272 break;
273 case GK_GFX1201:
274 case GK_GFX1200:
275 case GK_GFX12_GENERIC:
276 Features["ci-insts"] = true;
277 Features["dot7-insts"] = true;
278 Features["dot8-insts"] = true;
279 Features["dot9-insts"] = true;
280 Features["dot10-insts"] = true;
281 Features["dot11-insts"] = true;
282 Features["dot12-insts"] = true;
283 Features["dl-insts"] = true;
284 Features["atomic-ds-pk-add-16-insts"] = true;
285 Features["atomic-flat-pk-add-16-insts"] = true;
286 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
287 Features["atomic-buffer-pk-add-bf16-inst"] = true;
288 Features["atomic-global-pk-add-bf16-inst"] = true;
289 Features["16-bit-insts"] = true;
290 Features["dpp"] = true;
291 Features["gfx8-insts"] = true;
292 Features["gfx9-insts"] = true;
293 Features["flat-global-insts"] = true;
294 Features["gfx10-insts"] = true;
295 Features["gfx10-3-insts"] = true;
296 Features["gfx11-insts"] = true;
297 Features["gfx12-insts"] = true;
298 Features["atomic-fadd-rtn-insts"] = true;
299 Features["image-insts"] = true;
300 Features["cube-insts"] = true;
301 Features["lerp-inst"] = true;
302 Features["sad-insts"] = true;
303 Features["qsad-insts"] = true;
304 Features["msad-insts"] = true;
305 Features["mqsad-pk-insts"] = true;
306 Features["mqsad-insts"] = true;
307 Features["cvt-pknorm-vop2-insts"] = true;
308 Features["fp8-conversion-insts"] = true;
309 Features["wmma-128b-insts"] = true;
310 Features["swmmac-gfx1200-insts"] = true;
311 Features["atomic-fmin-fmax-global-f32"] = true;
312 break;
313 case GK_GFX1170:
314 case GK_GFX1171:
315 case GK_GFX1172:
316 case GK_GFX11_7_GENERIC:
317 Features["ci-insts"] = true;
318 Features["dot7-insts"] = true;
319 Features["dot8-insts"] = true;
320 Features["dot9-insts"] = true;
321 Features["dot10-insts"] = true;
322 Features["dot12-insts"] = true;
323 Features["dl-insts"] = true;
324 Features["16-bit-insts"] = true;
325 Features["dpp"] = true;
326 Features["gfx8-insts"] = true;
327 Features["gfx9-insts"] = true;
328 Features["flat-global-insts"] = true;
329 Features["gfx10-insts"] = true;
330 Features["gfx10-3-insts"] = true;
331 Features["gfx11-insts"] = true;
332 Features["atomic-fadd-rtn-insts"] = true;
333 Features["image-insts"] = true;
334 Features["cube-insts"] = true;
335 Features["lerp-inst"] = true;
336 Features["sad-insts"] = true;
337 Features["qsad-insts"] = true;
338 Features["msad-insts"] = true;
339 Features["mqsad-pk-insts"] = true;
340 Features["mqsad-insts"] = true;
341 Features["cvt-pknorm-vop2-insts"] = true;
342 Features["gws"] = true;
343 Features["dot11-insts"] = true;
344 Features["fp8-conversion-insts"] = true;
345 Features["wmma-128b-insts"] = true;
346 Features["swmmac-gfx1200-insts"] = true;
347 Features["atomic-fmin-fmax-global-f32"] = true;
348 break;
349 case GK_GFX1154:
350 case GK_GFX1153:
351 case GK_GFX1152:
352 case GK_GFX1151:
353 case GK_GFX1150:
354 case GK_GFX1103:
355 case GK_GFX1102:
356 case GK_GFX1101:
357 case GK_GFX1100:
358 case GK_GFX11_GENERIC:
359 Features["ci-insts"] = true;
360 Features["dot5-insts"] = true;
361 Features["dot7-insts"] = true;
362 Features["dot8-insts"] = true;
363 Features["dot9-insts"] = true;
364 Features["dot10-insts"] = true;
365 Features["dot12-insts"] = true;
366 Features["dl-insts"] = true;
367 Features["16-bit-insts"] = true;
368 Features["dpp"] = true;
369 Features["gfx8-insts"] = true;
370 Features["gfx9-insts"] = true;
371 Features["flat-global-insts"] = true;
372 Features["gfx10-insts"] = true;
373 Features["gfx10-3-insts"] = true;
374 Features["gfx11-insts"] = true;
375 Features["atomic-fadd-rtn-insts"] = true;
376 Features["image-insts"] = true;
377 Features["cube-insts"] = true;
378 Features["lerp-inst"] = true;
379 Features["sad-insts"] = true;
380 Features["qsad-insts"] = true;
381 Features["msad-insts"] = true;
382 Features["mqsad-pk-insts"] = true;
383 Features["mqsad-insts"] = true;
384 Features["cvt-pknorm-vop2-insts"] = true;
385 Features["gws"] = true;
386 Features["wmma-256b-insts"] = true;
387 Features["atomic-fmin-fmax-global-f32"] = true;
388 break;
389 case GK_GFX1036:
390 case GK_GFX1035:
391 case GK_GFX1034:
392 case GK_GFX1033:
393 case GK_GFX1032:
394 case GK_GFX1031:
395 case GK_GFX1030:
396 case GK_GFX10_3_GENERIC:
397 Features["ci-insts"] = true;
398 Features["dot1-insts"] = true;
399 Features["dot2-insts"] = true;
400 Features["dot5-insts"] = true;
401 Features["dot6-insts"] = true;
402 Features["dot7-insts"] = true;
403 Features["dot10-insts"] = true;
404 Features["dl-insts"] = true;
405 Features["16-bit-insts"] = true;
406 Features["dpp"] = true;
407 Features["gfx8-insts"] = true;
408 Features["gfx9-insts"] = true;
409 Features["flat-global-insts"] = true;
410 Features["gfx10-insts"] = true;
411 Features["gfx10-3-insts"] = true;
412 Features["image-insts"] = true;
413 Features["s-memrealtime"] = true;
414 Features["s-memtime-inst"] = true;
415 Features["gws"] = true;
416 Features["vmem-to-lds-load-insts"] = true;
417 Features["atomic-fmin-fmax-global-f32"] = true;
418 Features["atomic-fmin-fmax-global-f64"] = true;
419 Features["cube-insts"] = true;
420 Features["lerp-inst"] = true;
421 Features["sad-insts"] = true;
422 Features["qsad-insts"] = true;
423 Features["msad-insts"] = true;
424 Features["mqsad-pk-insts"] = true;
425 Features["mqsad-insts"] = true;
426 Features["cvt-pknorm-vop2-insts"] = true;
427 break;
428 case GK_GFX1012:
429 case GK_GFX1011:
430 Features["dot1-insts"] = true;
431 Features["dot2-insts"] = true;
432 Features["dot5-insts"] = true;
433 Features["dot6-insts"] = true;
434 Features["dot7-insts"] = true;
435 Features["dot10-insts"] = true;
436 [[fallthrough]];
437 case GK_GFX1013:
438 case GK_GFX1010:
439 case GK_GFX10_1_GENERIC:
440 Features["dl-insts"] = true;
441 Features["ci-insts"] = true;
442 Features["16-bit-insts"] = true;
443 Features["dpp"] = true;
444 Features["gfx8-insts"] = true;
445 Features["gfx9-insts"] = true;
446 Features["flat-global-insts"] = true;
447 Features["gfx10-insts"] = true;
448 Features["image-insts"] = true;
449 Features["s-memrealtime"] = true;
450 Features["s-memtime-inst"] = true;
451 Features["gws"] = true;
452 Features["vmem-to-lds-load-insts"] = true;
453 Features["atomic-fmin-fmax-global-f32"] = true;
454 Features["atomic-fmin-fmax-global-f64"] = true;
455 Features["cube-insts"] = true;
456 Features["lerp-inst"] = true;
457 Features["sad-insts"] = true;
458 Features["qsad-insts"] = true;
459 Features["msad-insts"] = true;
460 Features["mqsad-pk-insts"] = true;
461 Features["mqsad-insts"] = true;
462 Features["cvt-pknorm-vop2-insts"] = true;
463 break;
464 case GK_GFX950:
465 Features["bitop3-insts"] = true;
466 Features["fp6bf6-cvt-scale-insts"] = true;
467 Features["fp4-cvt-scale-insts"] = true;
468 Features["bf8-cvt-scale-insts"] = true;
469 Features["fp8-cvt-scale-insts"] = true;
470 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
471 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
472 Features["prng-inst"] = true;
473 Features["permlane16-swap"] = true;
474 Features["permlane32-swap"] = true;
475 Features["ashr-pk-insts"] = true;
476 Features["dot12-insts"] = true;
477 Features["dot13-insts"] = true;
478 Features["atomic-buffer-pk-add-bf16-inst"] = true;
479 Features["gfx950-insts"] = true;
480 [[fallthrough]];
481 case GK_GFX942:
482 Features["fp8-insts"] = true;
483 Features["fp8-conversion-insts"] = true;
484 if (Kind != GK_GFX950)
485 Features["xf32-insts"] = true;
486 [[fallthrough]];
487 case GK_GFX9_4_GENERIC:
488 Features["gfx940-insts"] = true;
489 Features["atomic-ds-pk-add-16-insts"] = true;
490 Features["atomic-flat-pk-add-16-insts"] = true;
491 Features["atomic-global-pk-add-bf16-inst"] = true;
492 Features["gfx90a-insts"] = true;
493 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
494 Features["atomic-fadd-rtn-insts"] = true;
495 Features["dot3-insts"] = true;
496 Features["dot4-insts"] = true;
497 Features["dot5-insts"] = true;
498 Features["dot6-insts"] = true;
499 Features["mai-insts"] = true;
500 Features["dl-insts"] = true;
501 Features["dot1-insts"] = true;
502 Features["dot2-insts"] = true;
503 Features["dot7-insts"] = true;
504 Features["dot10-insts"] = true;
505 Features["gfx9-insts"] = true;
506 Features["flat-global-insts"] = true;
507 Features["gfx8-insts"] = true;
508 Features["16-bit-insts"] = true;
509 Features["dpp"] = true;
510 Features["s-memrealtime"] = true;
511 Features["ci-insts"] = true;
512 Features["s-memtime-inst"] = true;
513 Features["gws"] = true;
514 Features["vmem-to-lds-load-insts"] = true;
515 Features["atomic-fmin-fmax-global-f64"] = true;
516 Features["wavefrontsize64"] = true;
517 Features["cube-insts"] = true;
518 Features["lerp-inst"] = true;
519 Features["sad-insts"] = true;
520 Features["qsad-insts"] = true;
521 Features["msad-insts"] = true;
522 Features["mqsad-pk-insts"] = true;
523 Features["mqsad-insts"] = true;
524 Features["cvt-pknorm-vop2-insts"] = true;
525 break;
526 case GK_GFX90A:
527 Features["gfx90a-insts"] = true;
528 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
529 Features["atomic-fadd-rtn-insts"] = true;
530 Features["atomic-fmin-fmax-global-f64"] = true;
531 [[fallthrough]];
532 case GK_GFX908:
533 Features["dot3-insts"] = true;
534 Features["dot4-insts"] = true;
535 Features["dot5-insts"] = true;
536 Features["dot6-insts"] = true;
537 Features["mai-insts"] = true;
538 [[fallthrough]];
539 case GK_GFX906:
540 Features["dl-insts"] = true;
541 Features["dot1-insts"] = true;
542 Features["dot2-insts"] = true;
543 Features["dot7-insts"] = true;
544 Features["dot10-insts"] = true;
545 [[fallthrough]];
546 case GK_GFX90C:
547 case GK_GFX909:
548 case GK_GFX904:
549 case GK_GFX902:
550 case GK_GFX900:
551 case GK_GFX9_GENERIC:
552 Features["gfx9-insts"] = true;
553 Features["flat-global-insts"] = true;
554 Features["vmem-to-lds-load-insts"] = true;
555 [[fallthrough]];
556 case GK_GFX810:
557 case GK_GFX805:
558 case GK_GFX803:
559 case GK_GFX802:
560 case GK_GFX801:
561 Features["gfx8-insts"] = true;
562 Features["16-bit-insts"] = true;
563 Features["dpp"] = true;
564 Features["s-memrealtime"] = true;
565 Features["ci-insts"] = true;
566 Features["image-insts"] = true;
567 Features["s-memtime-inst"] = true;
568 Features["gws"] = true;
569 Features["wavefrontsize64"] = true;
570 Features["cube-insts"] = true;
571 Features["lerp-inst"] = true;
572 Features["sad-insts"] = true;
573 Features["qsad-insts"] = true;
574 Features["msad-insts"] = true;
575 Features["mqsad-pk-insts"] = true;
576 Features["mqsad-insts"] = true;
577 Features["cvt-pknorm-vop2-insts"] = true;
578 break;
579 case GK_GFX705:
580 case GK_GFX704:
581 case GK_GFX703:
582 case GK_GFX702:
583 case GK_GFX701:
584 case GK_GFX700:
585 Features["ci-insts"] = true;
586 Features["cube-insts"] = true;
587 Features["lerp-inst"] = true;
588 Features["sad-insts"] = true;
589 Features["qsad-insts"] = true;
590 Features["msad-insts"] = true;
591 Features["mqsad-pk-insts"] = true;
592 Features["mqsad-insts"] = true;
593 Features["cvt-pknorm-vop2-insts"] = true;
594 Features["image-insts"] = true;
595 Features["s-memtime-inst"] = true;
596 Features["gws"] = true;
597 Features["atomic-fmin-fmax-global-f32"] = true;
598 Features["atomic-fmin-fmax-global-f64"] = true;
599 Features["wavefrontsize64"] = true;
600 break;
601 case GK_GFX602:
602 case GK_GFX601:
603 case GK_GFX600:
604 Features["image-insts"] = true;
605 Features["s-memtime-inst"] = true;
606 Features["gws"] = true;
607 Features["atomic-fmin-fmax-global-f32"] = true;
608 Features["atomic-fmin-fmax-global-f64"] = true;
609 Features["wavefrontsize64"] = true;
610 Features["cube-insts"] = true;
611 Features["lerp-inst"] = true;
612 Features["sad-insts"] = true;
613 Features["msad-insts"] = true;
614 Features["mqsad-pk-insts"] = true;
615 Features["cvt-pknorm-vop2-insts"] = true;
616 break;
617 case GK_NONE:
618 break;
619 default:
620 llvm_unreachable("Unhandled GPU!");
621 }
622}
623
624/// Fills Features map with default values for given target GPU.
625/// \p Features contains overriding target features and this function returns
626/// default target features with entries overridden by \p Features.
627std::pair<FeatureError, StringRef>
628AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
629 StringMap<bool> &Features) {
630 // XXX - What does the member GPU mean if device name string passed here?
631 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
632 // AMDGCN SPIRV must support the union of all AMDGCN features.
633 SmallVector<StringRef> GPUs;
634 fillValidArchListAMDGCN(Values&: GPUs);
635
636 static const Triple AMDGCN("amdgcn-amd-amdhsa");
637 StringMap<bool> Tmp;
638 for (auto &&GPU : GPUs) {
639 fillAMDGCNFeatureMap(GPU, T: AMDGCN, Features&: Tmp);
640 for (auto &&[F, B] : Tmp)
641 Features[F] = B;
642 }
643 Features["wavefrontsize32"] = true;
644 Features["wavefrontsize64"] = true;
645 } else if (T.isAMDGCN()) {
646 StringMap<bool> DefaultFeatures;
647 fillAMDGCNFeatureMap(GPU, T, Features&: DefaultFeatures);
648 return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features);
649 } else {
650 if (GPU.empty())
651 GPU = "r600";
652
653 switch (llvm::AMDGPU::parseArchR600(CPU: GPU)) {
654 case GK_CAYMAN:
655 case GK_CYPRESS:
656 case GK_RV770:
657 case GK_RV670:
658 // TODO: Add fp64 when implemented.
659 break;
660 case GK_TURKS:
661 case GK_CAICOS:
662 case GK_BARTS:
663 case GK_SUMO:
664 case GK_REDWOOD:
665 case GK_JUNIPER:
666 case GK_CEDAR:
667 case GK_RV730:
668 case GK_RV710:
669 case GK_RS880:
670 case GK_R630:
671 case GK_R600:
672 break;
673 default:
674 llvm_unreachable("Unhandled GPU!");
675 }
676 }
677 return {NO_ERROR, StringRef()};
678}
679
680TargetID::TargetID(GPUKind Arch, const Triple &TT, TargetIDSetting XnackSetting,
681 TargetIDSetting SramEccSetting)
682 : Arch(Arch),
683 TargetTripleString(TT.normalize(Form: Triple::CanonicalForm::FOUR_IDENT)),
684 XnackSetting(XnackSetting), SramEccSetting(SramEccSetting),
685 IsAMDHSA(TT.getOS() == Triple::AMDHSA) {}
686
687static TargetIDSetting
688getTargetIDSettingFromFeatureString(StringRef FeatureString) {
689 if (FeatureString.ends_with(Suffix: "-"))
690 return TargetIDSetting::Off;
691 if (FeatureString.ends_with(Suffix: "+"))
692 return TargetIDSetting::On;
693
694 llvm_unreachable("Malformed feature string");
695}
696
697void TargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
698 SmallVector<StringRef, 3> TargetIDSplit;
699 TargetID.split(A&: TargetIDSplit, Separator: ':');
700
701 for (const auto &FeatureString : TargetIDSplit) {
702 if (FeatureString.starts_with(Prefix: "xnack"))
703 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
704 if (FeatureString.starts_with(Prefix: "sramecc"))
705 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
706 }
707}
708
709std::optional<TargetID>
710TargetID::parseTargetIDString(StringRef TargetIDDirective) {
711 // Split on '-' to get arch-vendor-os-environment-processor:features
712 // There is a single dash separator after the 4-component triple
713 SmallVector<StringRef, 5> Parts;
714 TargetIDDirective.split(A&: Parts, Separator: '-', /*MaxSplit=*/4);
715 if (Parts.size() < 4)
716 return std::nullopt;
717
718 Triple TT(Parts[0], Parts[1], Parts[2], Parts[3]);
719 if (!TT.isAMDGCN())
720 return std::nullopt;
721
722 SmallVector<StringRef, 3> FeatureSplit;
723 Parts[4].split(A&: FeatureSplit, Separator: ':');
724 if (FeatureSplit.empty())
725 return std::nullopt;
726
727 StringRef CPUName = FeatureSplit[0];
728
729 // Determine xnack/sramecc support based on the architecture attributes
730 GPUKind Arch = parseArchAMDGCN(CPU: CPUName);
731 unsigned ArchAttr = getArchAttrAMDGCN(AK: Arch);
732
733 TargetIDSetting XnackSetting = (ArchAttr & FEATURE_XNACK)
734 ? TargetIDSetting::Any
735 : TargetIDSetting::Unsupported;
736 TargetIDSetting SramEccSetting = (ArchAttr & FEATURE_SRAMECC)
737 ? TargetIDSetting::Any
738 : TargetIDSetting::Unsupported;
739
740 for (StringRef FeatureString :
741 ArrayRef<StringRef>(FeatureSplit).drop_front(N: 1)) {
742 if (FeatureString.starts_with(Prefix: "xnack"))
743 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
744 else if (FeatureString.starts_with(Prefix: "sramecc"))
745 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
746 }
747
748 return TargetID(Arch, TT, XnackSetting, SramEccSetting);
749}
750
751void TargetID::print(raw_ostream &StreamRep) const {
752 StreamRep << TargetTripleString << '-' << getArchNameAMDGCN(AK: Arch);
753
754 if (IsAMDHSA) {
755 // sramecc.
756 if (getSramEccSetting() == TargetIDSetting::Off)
757 StreamRep << ":sramecc-";
758 else if (getSramEccSetting() == TargetIDSetting::On)
759 StreamRep << ":sramecc+";
760
761 // xnack.
762 if (getXnackSetting() == TargetIDSetting::Off)
763 StreamRep << ":xnack-";
764 else if (getXnackSetting() == TargetIDSetting::On)
765 StreamRep << ":xnack+";
766 }
767}
768
769std::string TargetID::toString() const {
770 std::string Str;
771 raw_string_ostream OS(Str);
772 OS << *this;
773 return Str;
774}
775
776bool TargetID::operator==(const TargetID &Other) const {
777 return Arch == Other.Arch && XnackSetting == Other.XnackSetting &&
778 SramEccSetting == Other.SramEccSetting && IsAMDHSA == Other.IsAMDHSA &&
779 TargetTripleString == Other.TargetTripleString;
780}
781