1//===-- TargetParser - Parser for target features ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a target parser to recognise hardware features such as
10// FPU/CPU/ARCH names as well as specific support such as HDIV, etc.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/TargetParser/TargetParser.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/StringSwitch.h"
17#include "llvm/TargetParser/Triple.h"
18
19using namespace llvm;
20using namespace AMDGPU;
21
22/// Find KV in array using binary search.
23static const BasicSubtargetSubTypeKV *
24find(StringRef S, ArrayRef<BasicSubtargetSubTypeKV> A) {
25 // Binary search the array
26 auto F = llvm::lower_bound(Range&: A, Value&: S);
27 // If not found then return NULL
28 if (F == A.end() || StringRef(F->Key) != S)
29 return nullptr;
30 // Return the found array item
31 return F;
32}
33
34/// For each feature that is (transitively) implied by this feature, set it.
35static void setImpliedBits(FeatureBitset &Bits, const FeatureBitset &Implies,
36 ArrayRef<BasicSubtargetFeatureKV> FeatureTable) {
37 // OR the Implies bits in outside the loop. This allows the Implies for CPUs
38 // which might imply features not in FeatureTable to use this.
39 Bits |= Implies;
40 for (const auto &FE : FeatureTable)
41 if (Implies.test(I: FE.Value))
42 setImpliedBits(Bits, Implies: FE.Implies.getAsBitset(), FeatureTable);
43}
44
45std::optional<llvm::StringMap<bool>> llvm::getCPUDefaultTargetFeatures(
46 StringRef CPU, ArrayRef<BasicSubtargetSubTypeKV> ProcDesc,
47 ArrayRef<BasicSubtargetFeatureKV> ProcFeatures) {
48 if (CPU.empty())
49 return std::nullopt;
50
51 const BasicSubtargetSubTypeKV *CPUEntry = ::find(S: CPU, A: ProcDesc);
52 if (!CPUEntry)
53 return std::nullopt;
54
55 // Set the features implied by this CPU feature if there is a match.
56 FeatureBitset Bits;
57 llvm::StringMap<bool> DefaultFeatures;
58 setImpliedBits(Bits, Implies: CPUEntry->Implies.getAsBitset(), FeatureTable: ProcFeatures);
59
60 [[maybe_unused]] unsigned BitSize = Bits.size();
61 for (const BasicSubtargetFeatureKV &FE : ProcFeatures) {
62 assert(FE.Value < BitSize && "Target Feature is out of range");
63 if (Bits[FE.Value])
64 DefaultFeatures[FE.Key] = true;
65 }
66 return DefaultFeatures;
67}
68
69StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
70 StringRef ArchName = getArchNameAMDGCN(AK);
71 assert((AK >= GK_AMDGCN_GENERIC_FIRST && AK <= GK_AMDGCN_GENERIC_LAST) ==
72 ArchName.ends_with("-generic") &&
73 "Generic AMDGCN arch not classified correctly!");
74 if (AK >= GK_AMDGCN_GENERIC_FIRST && AK <= GK_AMDGCN_GENERIC_LAST) {
75 // Return the part before the first '-', e.g. "gfx9-4-generic" -> "gfx9".
76 return ArchName.take_front(N: ArchName.find(C: '-'));
77 }
78 return ArchName.empty() ? "" : ArchName.drop_back(N: 2);
79}
80
81StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
82 switch (AK) {
83#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
84 case ENUM: \
85 return NAME;
86#include "llvm/TargetParser/AMDGPUTargetParser.def"
87 default:
88 return "";
89 }
90}
91
92StringRef llvm::AMDGPU::getArchNameR600(GPUKind AK) {
93 switch (AK) {
94#define R600_GPU(NAME, ENUM, FEATURES) \
95 case ENUM: \
96 return NAME;
97#include "llvm/TargetParser/AMDGPUTargetParser.def"
98 default:
99 return "";
100 }
101}
102
103AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) {
104 return StringSwitch<AMDGPU::GPUKind>(CPU)
105#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) .Case(NAME, ENUM)
106#define AMDGCN_GPU_ALIAS(NAME, ENUM) .Case(NAME, ENUM)
107#include "llvm/TargetParser/AMDGPUTargetParser.def"
108 .Default(Value: AMDGPU::GPUKind::GK_NONE);
109}
110
111AMDGPU::GPUKind llvm::AMDGPU::parseArchR600(StringRef CPU) {
112 return StringSwitch<AMDGPU::GPUKind>(CPU)
113#define R600_GPU(NAME, ENUM, FEATURES) .Case(NAME, ENUM)
114#define R600_GPU_ALIAS(NAME, ENUM) .Case(NAME, ENUM)
115#include "llvm/TargetParser/AMDGPUTargetParser.def"
116 .Default(Value: AMDGPU::GPUKind::GK_NONE);
117}
118
119unsigned AMDGPU::getArchAttrAMDGCN(GPUKind AK) {
120 switch (AK) {
121#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
122 case ENUM: \
123 return FEATURES;
124#include "llvm/TargetParser/AMDGPUTargetParser.def"
125 default:
126 return FEATURE_NONE;
127 }
128}
129
130unsigned AMDGPU::getArchAttrR600(GPUKind AK) {
131 switch (AK) {
132#define R600_GPU(NAME, ENUM, FEATURES) \
133 case ENUM: \
134 return FEATURES;
135#include "llvm/TargetParser/AMDGPUTargetParser.def"
136 default:
137 return FEATURE_NONE;
138 }
139}
140
141void AMDGPU::fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values) {
142 // XXX: Should this only report unique canonical names?
143 Values.append(IL: {
144#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) NAME,
145#define AMDGCN_GPU_ALIAS(NAME, ENUM) NAME,
146#include "llvm/TargetParser/AMDGPUTargetParser.def"
147 });
148}
149
150void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) {
151 Values.append(IL: {
152#define R600_GPU(NAME, ENUM, FEATURES) NAME,
153#define R600_GPU_ALIAS(NAME, ENUM) NAME,
154#include "llvm/TargetParser/AMDGPUTargetParser.def"
155 });
156}
157
158AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
159 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
160 if (AK == AMDGPU::GPUKind::GK_NONE) {
161 if (GPU == "generic-hsa")
162 return {.Major: 7, .Minor: 0, .Stepping: 0};
163 if (GPU == "generic")
164 return {.Major: 6, .Minor: 0, .Stepping: 0};
165 return {.Major: 0, .Minor: 0, .Stepping: 0};
166 }
167
168 switch (AK) {
169#define MAKE_ISAVERSION(A, B, C) {A, B, C}
170#define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) \
171 case ENUM: \
172 return MAKE_ISAVERSION ISAVERSION;
173#include "llvm/TargetParser/AMDGPUTargetParser.def"
174#undef MAKE_ISAVERSION
175 default:
176 return {.Major: 0, .Minor: 0, .Stepping: 0};
177 }
178}
179
180StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
181 assert(T.isAMDGPU());
182 auto ProcKind = T.isAMDGCN() ? parseArchAMDGCN(CPU: Arch) : parseArchR600(CPU: Arch);
183 if (ProcKind == GK_NONE)
184 return StringRef();
185
186 return T.isAMDGCN() ? getArchNameAMDGCN(AK: ProcKind) : getArchNameR600(AK: ProcKind);
187}
188
189static std::pair<FeatureError, StringRef>
190insertWaveSizeFeature(StringRef GPU, const Triple &T,
191 const StringMap<bool> &DefaultFeatures,
192 StringMap<bool> &Features) {
193 const bool IsNullGPU = GPU.empty();
194 const bool TargetHasWave32 = DefaultFeatures.count(Key: "wavefrontsize32");
195 const bool TargetHasWave64 = DefaultFeatures.count(Key: "wavefrontsize64");
196
197 auto Wave32Itr = Features.find(Key: "wavefrontsize32");
198 auto Wave64Itr = Features.find(Key: "wavefrontsize64");
199 const bool EnableWave32 =
200 Wave32Itr != Features.end() && Wave32Itr->getValue();
201 const bool EnableWave64 =
202 Wave64Itr != Features.end() && Wave64Itr->getValue();
203 const bool DisableWave32 =
204 Wave32Itr != Features.end() && !Wave32Itr->getValue();
205 const bool DisableWave64 =
206 Wave64Itr != Features.end() && !Wave64Itr->getValue();
207
208 if (EnableWave32 && EnableWave64)
209 return {AMDGPU::INVALID_FEATURE_COMBINATION,
210 "'+wavefrontsize32' and '+wavefrontsize64' are mutually exclusive"};
211 if (DisableWave32 && DisableWave64)
212 return {AMDGPU::INVALID_FEATURE_COMBINATION,
213 "'-wavefrontsize32' and '-wavefrontsize64' are mutually exclusive"};
214
215 if (!IsNullGPU) {
216 if (TargetHasWave64) {
217 if (EnableWave32)
218 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize32"};
219 if (DisableWave64)
220 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize64"};
221 }
222
223 if (TargetHasWave32) {
224 if (EnableWave64)
225 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "+wavefrontsize64"};
226 if (DisableWave32)
227 return {AMDGPU::UNSUPPORTED_TARGET_FEATURE, "-wavefrontsize32"};
228 }
229 }
230
231 // Don't assume any wavesize with an unknown subtarget.
232 // Default to wave32 if target supports both.
233 if (!IsNullGPU && !EnableWave32 && !EnableWave64 && !TargetHasWave32 &&
234 !TargetHasWave64)
235 Features.insert(KV: std::make_pair(x: "wavefrontsize32", y: true));
236
237 for (const auto &Entry : DefaultFeatures) {
238 if (!Features.count(Key: Entry.getKey()))
239 Features[Entry.getKey()] = Entry.getValue();
240 }
241
242 return {NO_ERROR, StringRef()};
243}
244
245/// Fills Features map with default values for given target GPU.
246/// \p Features contains overriding target features and this function returns
247/// default target features with entries overridden by \p Features.
248static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T,
249 StringMap<bool> &Features) {
250 AMDGPU::GPUKind Kind = parseArchAMDGCN(CPU: GPU);
251 switch (Kind) {
252 case GK_GFX1310:
253 case GK_GFX1251:
254 case GK_GFX1250:
255 case GK_GFX12_5_GENERIC:
256 Features["ci-insts"] = true;
257 Features["dot7-insts"] = true;
258 Features["dot8-insts"] = true;
259 Features["dl-insts"] = true;
260 Features["16-bit-insts"] = true;
261 Features["dpp"] = true;
262 Features["gfx8-insts"] = true;
263 Features["gfx9-insts"] = true;
264 Features["gfx10-insts"] = true;
265 Features["gfx10-3-insts"] = true;
266 Features["gfx11-insts"] = true;
267 Features["gfx12-insts"] = true;
268 Features["gfx1250-insts"] = true;
269 Features["bitop3-insts"] = true;
270 Features["prng-inst"] = true;
271 Features["tanh-insts"] = true;
272 Features["tensor-cvt-lut-insts"] = true;
273 Features["transpose-load-f4f6-insts"] = true;
274 Features["bf16-trans-insts"] = true;
275 Features["bf16-cvt-insts"] = true;
276 Features["bf16-pk-insts"] = true;
277 Features["fp8-conversion-insts"] = true;
278 Features["fp8e5m3-insts"] = true;
279 Features["permlane16-swap"] = true;
280 Features["ashr-pk-insts"] = true;
281 Features["add-min-max-insts"] = true;
282 Features["pk-add-min-max-insts"] = true;
283 Features["atomic-buffer-pk-add-bf16-inst"] = true;
284 Features["vmem-pref-insts"] = true;
285 Features["atomic-fadd-rtn-insts"] = true;
286 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
287 Features["atomic-flat-pk-add-16-insts"] = true;
288 Features["atomic-global-pk-add-bf16-inst"] = true;
289 Features["atomic-ds-pk-add-16-insts"] = true;
290 Features["setprio-inc-wg-inst"] = true;
291 Features["s-wakeup-barrier-inst"] = true;
292 Features["atomic-fmin-fmax-global-f32"] = true;
293 Features["atomic-fmin-fmax-global-f64"] = true;
294 Features["wavefrontsize32"] = true;
295 Features["clusters"] = true;
296 Features["mcast-load-insts"] = true;
297 Features["cube-insts"] = true;
298 Features["lerp-inst"] = true;
299 Features["sad-insts"] = true;
300 Features["qsad-insts"] = true;
301 Features["cvt-pknorm-vop2-insts"] = true;
302 break;
303 case GK_GFX1201:
304 case GK_GFX1200:
305 case GK_GFX12_GENERIC:
306 Features["ci-insts"] = true;
307 Features["dot7-insts"] = true;
308 Features["dot8-insts"] = true;
309 Features["dot9-insts"] = true;
310 Features["dot10-insts"] = true;
311 Features["dot11-insts"] = true;
312 Features["dot12-insts"] = true;
313 Features["dl-insts"] = true;
314 Features["atomic-ds-pk-add-16-insts"] = true;
315 Features["atomic-flat-pk-add-16-insts"] = true;
316 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
317 Features["atomic-buffer-pk-add-bf16-inst"] = true;
318 Features["atomic-global-pk-add-bf16-inst"] = true;
319 Features["16-bit-insts"] = true;
320 Features["dpp"] = true;
321 Features["gfx8-insts"] = true;
322 Features["gfx9-insts"] = true;
323 Features["gfx10-insts"] = true;
324 Features["gfx10-3-insts"] = true;
325 Features["gfx11-insts"] = true;
326 Features["gfx12-insts"] = true;
327 Features["atomic-fadd-rtn-insts"] = true;
328 Features["image-insts"] = true;
329 Features["cube-insts"] = true;
330 Features["lerp-inst"] = true;
331 Features["sad-insts"] = true;
332 Features["qsad-insts"] = true;
333 Features["cvt-pknorm-vop2-insts"] = true;
334 Features["fp8-conversion-insts"] = true;
335 Features["wmma-128b-insts"] = true;
336 Features["atomic-fmin-fmax-global-f32"] = true;
337 break;
338 case GK_GFX1170:
339 Features["ci-insts"] = true;
340 Features["dot7-insts"] = true;
341 Features["dot8-insts"] = true;
342 Features["dot9-insts"] = true;
343 Features["dot10-insts"] = true;
344 Features["dot12-insts"] = true;
345 Features["dl-insts"] = true;
346 Features["16-bit-insts"] = true;
347 Features["dpp"] = true;
348 Features["gfx8-insts"] = true;
349 Features["gfx9-insts"] = true;
350 Features["gfx10-insts"] = true;
351 Features["gfx10-3-insts"] = true;
352 Features["gfx11-insts"] = true;
353 Features["atomic-fadd-rtn-insts"] = true;
354 Features["image-insts"] = true;
355 Features["cube-insts"] = true;
356 Features["lerp-inst"] = true;
357 Features["sad-insts"] = true;
358 Features["qsad-insts"] = true;
359 Features["cvt-pknorm-vop2-insts"] = true;
360 Features["gws"] = true;
361 Features["dot11-insts"] = true;
362 Features["fp8-conversion-insts"] = true;
363 Features["wmma-128b-insts"] = true;
364 Features["atomic-fmin-fmax-global-f32"] = true;
365 break;
366 case GK_GFX1153:
367 case GK_GFX1152:
368 case GK_GFX1151:
369 case GK_GFX1150:
370 case GK_GFX1103:
371 case GK_GFX1102:
372 case GK_GFX1101:
373 case GK_GFX1100:
374 case GK_GFX11_GENERIC:
375 Features["ci-insts"] = true;
376 Features["dot5-insts"] = true;
377 Features["dot7-insts"] = true;
378 Features["dot8-insts"] = true;
379 Features["dot9-insts"] = true;
380 Features["dot10-insts"] = true;
381 Features["dot12-insts"] = true;
382 Features["dl-insts"] = true;
383 Features["16-bit-insts"] = true;
384 Features["dpp"] = true;
385 Features["gfx8-insts"] = true;
386 Features["gfx9-insts"] = true;
387 Features["gfx10-insts"] = true;
388 Features["gfx10-3-insts"] = true;
389 Features["gfx11-insts"] = true;
390 Features["atomic-fadd-rtn-insts"] = true;
391 Features["image-insts"] = true;
392 Features["cube-insts"] = true;
393 Features["lerp-inst"] = true;
394 Features["sad-insts"] = true;
395 Features["qsad-insts"] = true;
396 Features["cvt-pknorm-vop2-insts"] = true;
397 Features["gws"] = true;
398 Features["wmma-256b-insts"] = true;
399 Features["atomic-fmin-fmax-global-f32"] = true;
400 break;
401 case GK_GFX1036:
402 case GK_GFX1035:
403 case GK_GFX1034:
404 case GK_GFX1033:
405 case GK_GFX1032:
406 case GK_GFX1031:
407 case GK_GFX1030:
408 case GK_GFX10_3_GENERIC:
409 Features["ci-insts"] = true;
410 Features["dot1-insts"] = true;
411 Features["dot2-insts"] = true;
412 Features["dot5-insts"] = true;
413 Features["dot6-insts"] = true;
414 Features["dot7-insts"] = true;
415 Features["dot10-insts"] = true;
416 Features["dl-insts"] = true;
417 Features["16-bit-insts"] = true;
418 Features["dpp"] = true;
419 Features["gfx8-insts"] = true;
420 Features["gfx9-insts"] = true;
421 Features["gfx10-insts"] = true;
422 Features["gfx10-3-insts"] = true;
423 Features["image-insts"] = true;
424 Features["s-memrealtime"] = true;
425 Features["s-memtime-inst"] = true;
426 Features["gws"] = true;
427 Features["vmem-to-lds-load-insts"] = true;
428 Features["atomic-fmin-fmax-global-f32"] = true;
429 Features["atomic-fmin-fmax-global-f64"] = true;
430 Features["cube-insts"] = true;
431 Features["lerp-inst"] = true;
432 Features["sad-insts"] = true;
433 Features["qsad-insts"] = true;
434 Features["cvt-pknorm-vop2-insts"] = true;
435 break;
436 case GK_GFX1012:
437 case GK_GFX1011:
438 Features["dot1-insts"] = true;
439 Features["dot2-insts"] = true;
440 Features["dot5-insts"] = true;
441 Features["dot6-insts"] = true;
442 Features["dot7-insts"] = true;
443 Features["dot10-insts"] = true;
444 [[fallthrough]];
445 case GK_GFX1013:
446 case GK_GFX1010:
447 case GK_GFX10_1_GENERIC:
448 Features["dl-insts"] = true;
449 Features["ci-insts"] = true;
450 Features["16-bit-insts"] = true;
451 Features["dpp"] = true;
452 Features["gfx8-insts"] = true;
453 Features["gfx9-insts"] = true;
454 Features["gfx10-insts"] = true;
455 Features["image-insts"] = true;
456 Features["s-memrealtime"] = true;
457 Features["s-memtime-inst"] = true;
458 Features["gws"] = true;
459 Features["vmem-to-lds-load-insts"] = true;
460 Features["atomic-fmin-fmax-global-f32"] = true;
461 Features["atomic-fmin-fmax-global-f64"] = true;
462 Features["cube-insts"] = true;
463 Features["lerp-inst"] = true;
464 Features["sad-insts"] = true;
465 Features["qsad-insts"] = true;
466 Features["cvt-pknorm-vop2-insts"] = true;
467 break;
468 case GK_GFX950:
469 Features["bitop3-insts"] = true;
470 Features["fp6bf6-cvt-scale-insts"] = true;
471 Features["fp4-cvt-scale-insts"] = true;
472 Features["bf8-cvt-scale-insts"] = true;
473 Features["fp8-cvt-scale-insts"] = true;
474 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
475 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
476 Features["prng-inst"] = true;
477 Features["permlane16-swap"] = true;
478 Features["permlane32-swap"] = true;
479 Features["ashr-pk-insts"] = true;
480 Features["dot12-insts"] = true;
481 Features["dot13-insts"] = true;
482 Features["atomic-buffer-pk-add-bf16-inst"] = true;
483 Features["gfx950-insts"] = true;
484 [[fallthrough]];
485 case GK_GFX942:
486 Features["fp8-insts"] = true;
487 Features["fp8-conversion-insts"] = true;
488 if (Kind != GK_GFX950)
489 Features["xf32-insts"] = true;
490 [[fallthrough]];
491 case GK_GFX9_4_GENERIC:
492 Features["gfx940-insts"] = true;
493 Features["atomic-ds-pk-add-16-insts"] = true;
494 Features["atomic-flat-pk-add-16-insts"] = true;
495 Features["atomic-global-pk-add-bf16-inst"] = true;
496 Features["gfx90a-insts"] = true;
497 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
498 Features["atomic-fadd-rtn-insts"] = true;
499 Features["dot3-insts"] = true;
500 Features["dot4-insts"] = true;
501 Features["dot5-insts"] = true;
502 Features["dot6-insts"] = true;
503 Features["mai-insts"] = true;
504 Features["dl-insts"] = true;
505 Features["dot1-insts"] = true;
506 Features["dot2-insts"] = true;
507 Features["dot7-insts"] = true;
508 Features["dot10-insts"] = true;
509 Features["gfx9-insts"] = true;
510 Features["gfx8-insts"] = true;
511 Features["16-bit-insts"] = true;
512 Features["dpp"] = true;
513 Features["s-memrealtime"] = true;
514 Features["ci-insts"] = true;
515 Features["s-memtime-inst"] = true;
516 Features["gws"] = true;
517 Features["vmem-to-lds-load-insts"] = true;
518 Features["atomic-fmin-fmax-global-f64"] = true;
519 Features["wavefrontsize64"] = true;
520 Features["cube-insts"] = true;
521 Features["lerp-inst"] = true;
522 Features["sad-insts"] = true;
523 Features["qsad-insts"] = true;
524 Features["cvt-pknorm-vop2-insts"] = true;
525 break;
526 case GK_GFX90A:
527 Features["gfx90a-insts"] = true;
528 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
529 Features["atomic-fadd-rtn-insts"] = true;
530 Features["atomic-fmin-fmax-global-f64"] = true;
531 [[fallthrough]];
532 case GK_GFX908:
533 Features["dot3-insts"] = true;
534 Features["dot4-insts"] = true;
535 Features["dot5-insts"] = true;
536 Features["dot6-insts"] = true;
537 Features["mai-insts"] = true;
538 [[fallthrough]];
539 case GK_GFX906:
540 Features["dl-insts"] = true;
541 Features["dot1-insts"] = true;
542 Features["dot2-insts"] = true;
543 Features["dot7-insts"] = true;
544 Features["dot10-insts"] = true;
545 [[fallthrough]];
546 case GK_GFX90C:
547 case GK_GFX909:
548 case GK_GFX904:
549 case GK_GFX902:
550 case GK_GFX900:
551 case GK_GFX9_GENERIC:
552 Features["gfx9-insts"] = true;
553 Features["vmem-to-lds-load-insts"] = true;
554 [[fallthrough]];
555 case GK_GFX810:
556 case GK_GFX805:
557 case GK_GFX803:
558 case GK_GFX802:
559 case GK_GFX801:
560 Features["gfx8-insts"] = true;
561 Features["16-bit-insts"] = true;
562 Features["dpp"] = true;
563 Features["s-memrealtime"] = true;
564 Features["ci-insts"] = true;
565 Features["image-insts"] = true;
566 Features["s-memtime-inst"] = true;
567 Features["gws"] = true;
568 Features["wavefrontsize64"] = true;
569 Features["cube-insts"] = true;
570 Features["lerp-inst"] = true;
571 Features["sad-insts"] = true;
572 Features["qsad-insts"] = true;
573 Features["cvt-pknorm-vop2-insts"] = true;
574 break;
575 case GK_GFX705:
576 case GK_GFX704:
577 case GK_GFX703:
578 case GK_GFX702:
579 case GK_GFX701:
580 case GK_GFX700:
581 Features["ci-insts"] = true;
582 Features["cube-insts"] = true;
583 Features["lerp-inst"] = true;
584 Features["sad-insts"] = true;
585 Features["qsad-insts"] = true;
586 Features["cvt-pknorm-vop2-insts"] = true;
587 Features["image-insts"] = true;
588 Features["s-memtime-inst"] = true;
589 Features["gws"] = true;
590 Features["atomic-fmin-fmax-global-f32"] = true;
591 Features["atomic-fmin-fmax-global-f64"] = true;
592 Features["wavefrontsize64"] = true;
593 break;
594 case GK_GFX602:
595 case GK_GFX601:
596 case GK_GFX600:
597 Features["image-insts"] = true;
598 Features["s-memtime-inst"] = true;
599 Features["gws"] = true;
600 Features["atomic-fmin-fmax-global-f32"] = true;
601 Features["atomic-fmin-fmax-global-f64"] = true;
602 Features["wavefrontsize64"] = true;
603 Features["cube-insts"] = true;
604 Features["lerp-inst"] = true;
605 Features["sad-insts"] = true;
606 Features["cvt-pknorm-vop2-insts"] = true;
607 break;
608 case GK_NONE:
609 break;
610 default:
611 llvm_unreachable("Unhandled GPU!");
612 }
613}
614
615/// Fills Features map with default values for given target GPU.
616/// \p Features contains overriding target features and this function returns
617/// default target features with entries overridden by \p Features.
618std::pair<FeatureError, StringRef>
619AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
620 StringMap<bool> &Features) {
621 // XXX - What does the member GPU mean if device name string passed here?
622 if (T.isSPIRV() && T.getOS() == Triple::OSType::AMDHSA) {
623 // AMDGCN SPIRV must support the union of all AMDGCN features. This list
624 // should be kept in sorted order and updated whenever new features are
625 // added.
626 Features["16-bit-insts"] = true;
627 Features["ashr-pk-insts"] = true;
628 Features["atomic-buffer-pk-add-bf16-inst"] = true;
629 Features["atomic-buffer-global-pk-add-f16-insts"] = true;
630 Features["atomic-ds-pk-add-16-insts"] = true;
631 Features["atomic-fadd-rtn-insts"] = true;
632 Features["atomic-flat-pk-add-16-insts"] = true;
633 Features["atomic-global-pk-add-bf16-inst"] = true;
634 Features["bf16-trans-insts"] = true;
635 Features["bf16-cvt-insts"] = true;
636 Features["bf8-cvt-scale-insts"] = true;
637 Features["bitop3-insts"] = true;
638 Features["ci-insts"] = true;
639 Features["dl-insts"] = true;
640 Features["dot1-insts"] = true;
641 Features["dot2-insts"] = true;
642 Features["dot3-insts"] = true;
643 Features["dot4-insts"] = true;
644 Features["dot5-insts"] = true;
645 Features["dot6-insts"] = true;
646 Features["dot7-insts"] = true;
647 Features["dot8-insts"] = true;
648 Features["dot9-insts"] = true;
649 Features["dot10-insts"] = true;
650 Features["dot11-insts"] = true;
651 Features["dot12-insts"] = true;
652 Features["dot13-insts"] = true;
653 Features["dpp"] = true;
654 Features["f16bf16-to-fp6bf6-cvt-scale-insts"] = true;
655 Features["f32-to-f16bf16-cvt-sr-insts"] = true;
656 Features["fp4-cvt-scale-insts"] = true;
657 Features["fp6bf6-cvt-scale-insts"] = true;
658 Features["fp8e5m3-insts"] = true;
659 Features["fp8-conversion-insts"] = true;
660 Features["fp8-cvt-scale-insts"] = true;
661 Features["fp8-insts"] = true;
662 Features["gfx8-insts"] = true;
663 Features["gfx9-insts"] = true;
664 Features["gfx90a-insts"] = true;
665 Features["gfx940-insts"] = true;
666 Features["gfx950-insts"] = true;
667 Features["gfx10-insts"] = true;
668 Features["gfx10-3-insts"] = true;
669 Features["gfx11-insts"] = true;
670 Features["gfx12-insts"] = true;
671 Features["gfx1250-insts"] = true;
672 Features["gws"] = true;
673 Features["image-insts"] = true;
674 Features["mai-insts"] = true;
675 Features["permlane16-swap"] = true;
676 Features["permlane32-swap"] = true;
677 Features["prng-inst"] = true;
678 Features["setprio-inc-wg-inst"] = true;
679 Features["s-memrealtime"] = true;
680 Features["s-memtime-inst"] = true;
681 Features["tanh-insts"] = true;
682 Features["tensor-cvt-lut-insts"] = true;
683 Features["transpose-load-f4f6-insts"] = true;
684 Features["vmem-pref-insts"] = true;
685 Features["vmem-to-lds-load-insts"] = true;
686 Features["wavefrontsize32"] = true;
687 Features["wavefrontsize64"] = true;
688 } else if (T.isAMDGCN()) {
689 StringMap<bool> DefaultFeatures;
690 fillAMDGCNFeatureMap(GPU, T, Features&: DefaultFeatures);
691 return insertWaveSizeFeature(GPU, T, DefaultFeatures, Features);
692 } else {
693 if (GPU.empty())
694 GPU = "r600";
695
696 switch (llvm::AMDGPU::parseArchR600(CPU: GPU)) {
697 case GK_CAYMAN:
698 case GK_CYPRESS:
699 case GK_RV770:
700 case GK_RV670:
701 // TODO: Add fp64 when implemented.
702 break;
703 case GK_TURKS:
704 case GK_CAICOS:
705 case GK_BARTS:
706 case GK_SUMO:
707 case GK_REDWOOD:
708 case GK_JUNIPER:
709 case GK_CEDAR:
710 case GK_RV730:
711 case GK_RV710:
712 case GK_RS880:
713 case GK_R630:
714 case GK_R600:
715 break;
716 default:
717 llvm_unreachable("Unhandled GPU!");
718 }
719 }
720 return {NO_ERROR, StringRef()};
721}
722