1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPUMCExpr.h"
15#include "AMDGPUMCKernelDescriptor.h"
16#include "AMDGPUPTNote.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/MC/MCAssembler.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCELFObjectWriter.h"
25#include "llvm/MC/MCELFStreamer.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Support/AMDGPUMetadata.h"
28#include "llvm/Support/AMDHSAKernelDescriptor.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/FormattedStream.h"
31#include "llvm/TargetParser/TargetParser.h"
32
33using namespace llvm;
34using namespace llvm::AMDGPU;
35
36//===----------------------------------------------------------------------===//
37// AMDGPUTargetStreamer
38//===----------------------------------------------------------------------===//
39
40static cl::opt<unsigned>
41 ForceGenericVersion("amdgpu-force-generic-version",
42 cl::desc("Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
44 cl::ReallyHidden, cl::init(Val: 0));
45
46bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
47 msgpack::Document HSAMetadataDoc;
48 if (!HSAMetadataDoc.fromYAML(S: HSAMetadataString))
49 return false;
50 return EmitHSAMetadata(HSAMetadata&: HSAMetadataDoc, Strict: false);
51}
52
53StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
54 AMDGPU::GPUKind AK;
55
56 // clang-format off
57 switch (ElfMach) {
58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
60 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
61 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
62 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
63 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
65 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
66 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
67 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
68 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
70 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
71 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
72 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
73 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
74 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170: AK = GK_GFX1170; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251: AK = GK_GFX1251; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310: AK = GK_GFX1310; break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
129 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
130 default: AK = GK_NONE; break;
131 }
132 // clang-format on
133
134 StringRef GPUName = getArchNameAMDGCN(AK);
135 if (GPUName != "")
136 return GPUName;
137 return getArchNameR600(AK);
138}
139
140unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
141 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
142 if (AK == AMDGPU::GPUKind::GK_NONE)
143 AK = parseArchR600(CPU: GPU);
144
145 // clang-format off
146 switch (AK) {
147 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
148 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
149 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
150 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
151 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
152 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
153 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
154 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
155 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
156 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
157 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
158 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
159 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
160 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
161 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
162 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
163 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
164 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
165 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
166 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
167 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
168 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
169 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
170 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
171 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
172 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
173 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
174 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
175 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
176 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
177 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
178 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
179 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
180 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
181 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
182 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
183 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
184 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
185 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
186 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
187 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
188 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
189 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
190 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
191 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
192 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
193 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
194 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
195 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
196 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
197 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
198 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
199 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
200 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
201 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
202 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
203 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
204 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
205 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
206 case GK_GFX1170: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170;
207 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
208 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
209 case GK_GFX1250: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250;
210 case GK_GFX1251: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251;
211 case GK_GFX1310: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310;
212 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
213 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
214 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
215 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
216 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
217 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
218 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
219 }
220 // clang-format on
221
222 llvm_unreachable("unknown GPU");
223}
224
225//===----------------------------------------------------------------------===//
226// AMDGPUTargetAsmStreamer
227//===----------------------------------------------------------------------===//
228
229AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
230 formatted_raw_ostream &OS)
231 : AMDGPUTargetStreamer(S), OS(OS) { }
232
233// A hook for emitting stuff at the end.
234// We use it for emitting the accumulated PAL metadata as directives.
235// The PAL metadata is reset after it is emitted.
236void AMDGPUTargetAsmStreamer::finish() {
237 std::string S;
238 getPALMetadata()->toString(S);
239 OS << S;
240
241 // Reset the pal metadata so its data will not affect a compilation that
242 // reuses this object.
243 getPALMetadata()->reset();
244}
245
246void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
247 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
248}
249
250void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
251 unsigned COV) {
252 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
253 OS << "\t.amdhsa_code_object_version " << COV << '\n';
254}
255
256void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
257 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
258 const MCAsmInfo *MAI) {
259 printAMDGPUMCExpr(Expr: foldAMDGPUMCExpr(Expr, Ctx&: getContext()), OS, MAI);
260 };
261
262 OS << "\t.amd_kernel_code_t\n";
263 Header.EmitKernelCodeT(OS, Ctx&: getContext(), Helper: FoldAndPrint);
264 OS << "\t.end_amd_kernel_code_t\n";
265}
266
267void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
268 unsigned Type) {
269 switch (Type) {
270 default: llvm_unreachable("Invalid AMDGPU symbol type");
271 case ELF::STT_AMDGPU_HSA_KERNEL:
272 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
273 break;
274 }
275}
276
277void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
278 Align Alignment) {
279 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
280 << Alignment.value() << '\n';
281}
282
283void AMDGPUTargetAsmStreamer::EmitMCResourceInfo(
284 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
285 const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier,
286 const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC,
287 const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack,
288 const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) {
289#define PRINT_RES_INFO(ARG) \
290 OS << "\t.set "; \
291 ARG->print(OS, getContext().getAsmInfo()); \
292 OS << ", "; \
293 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
294 Streamer.addBlankLine();
295
296 PRINT_RES_INFO(NumVGPR);
297 PRINT_RES_INFO(NumAGPR);
298 PRINT_RES_INFO(NumExplicitSGPR);
299 PRINT_RES_INFO(NumNamedBarrier);
300 PRINT_RES_INFO(PrivateSegmentSize);
301 PRINT_RES_INFO(UsesVCC);
302 PRINT_RES_INFO(UsesFlatScratch);
303 PRINT_RES_INFO(HasDynamicallySizedStack);
304 PRINT_RES_INFO(HasRecursion);
305 PRINT_RES_INFO(HasIndirectCall);
306#undef PRINT_RES_INFO
307}
308
309void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(
310 const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR,
311 const MCSymbol *MaxNamedBarrier) {
312#define PRINT_RES_INFO(ARG) \
313 OS << "\t.set "; \
314 ARG->print(OS, getContext().getAsmInfo()); \
315 OS << ", "; \
316 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
317 Streamer.addBlankLine();
318
319 PRINT_RES_INFO(MaxVGPR);
320 PRINT_RES_INFO(MaxAGPR);
321 PRINT_RES_INFO(MaxSGPR);
322 PRINT_RES_INFO(MaxNamedBarrier);
323#undef PRINT_RES_INFO
324}
325
326bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
327 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
328 return true;
329}
330
331bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
332 msgpack::Document &HSAMetadataDoc, bool Strict) {
333 HSAMD::V3::MetadataVerifier Verifier(Strict);
334 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
335 return false;
336
337 std::string HSAMetadataString;
338 raw_string_ostream StrOS(HSAMetadataString);
339 HSAMetadataDoc.toYAML(OS&: StrOS);
340
341 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
342 OS << StrOS.str() << '\n';
343 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
344 return true;
345}
346
347bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
348 const uint32_t Encoded_s_code_end = 0xbf9f0000;
349 const uint32_t Encoded_s_nop = 0xbf800000;
350 uint32_t Encoded_pad = Encoded_s_code_end;
351
352 // Instruction cache line size in bytes.
353 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
354 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
355
356 // Extra padding amount in bytes to support prefetch mode 3.
357 unsigned FillSize = 3 * CacheLineSize;
358
359 if (AMDGPU::isGFX90A(STI)) {
360 Encoded_pad = Encoded_s_nop;
361 FillSize = 16 * CacheLineSize;
362 }
363
364 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
365 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
366 return true;
367}
368
369void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
370 const MCSubtargetInfo &STI, StringRef KernelName,
371 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
372 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
373 const MCExpr *ReserveFlatScr) {
374 IsaVersion IVersion = getIsaVersion(GPU: STI.getCPU());
375 const MCAsmInfo *MAI = getContext().getAsmInfo();
376
377 OS << "\t.amdhsa_kernel " << KernelName << '\n';
378
379 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
380 StringRef Directive) {
381 OS << "\t\t" << Directive << ' ';
382 const MCExpr *ShiftedAndMaskedExpr =
383 MCKernelDescriptor::bits_get(Src: Expr, Shift, Mask, Ctx&: getContext());
384 const MCExpr *New = foldAMDGPUMCExpr(Expr: ShiftedAndMaskedExpr, Ctx&: getContext());
385 printAMDGPUMCExpr(Expr: New, OS, MAI);
386 OS << '\n';
387 };
388
389 auto EmitMCExpr = [&](const MCExpr *Value) {
390 const MCExpr *NewExpr = foldAMDGPUMCExpr(Expr: Value, Ctx&: getContext());
391 printAMDGPUMCExpr(Expr: NewExpr, OS, MAI);
392 };
393
394 OS << "\t\t.amdhsa_group_segment_fixed_size ";
395 EmitMCExpr(KD.group_segment_fixed_size);
396 OS << '\n';
397
398 OS << "\t\t.amdhsa_private_segment_fixed_size ";
399 EmitMCExpr(KD.private_segment_fixed_size);
400 OS << '\n';
401
402 OS << "\t\t.amdhsa_kernarg_size ";
403 EmitMCExpr(KD.kernarg_size);
404 OS << '\n';
405
406 if (isGFX1250Plus(STI)) {
407 PrintField(KD.compute_pgm_rsrc2,
408 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
409 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
410 ".amdhsa_user_sgpr_count");
411 } else {
412 PrintField(KD.compute_pgm_rsrc2,
413 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
414 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
415 ".amdhsa_user_sgpr_count");
416 }
417
418 if (!hasArchitectedFlatScratch(STI))
419 PrintField(
420 KD.kernel_code_properties,
421 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
422 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
423 ".amdhsa_user_sgpr_private_segment_buffer");
424 PrintField(KD.kernel_code_properties,
425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
426 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
427 ".amdhsa_user_sgpr_dispatch_ptr");
428 PrintField(KD.kernel_code_properties,
429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
430 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
431 ".amdhsa_user_sgpr_queue_ptr");
432 PrintField(KD.kernel_code_properties,
433 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
434 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
435 ".amdhsa_user_sgpr_kernarg_segment_ptr");
436 PrintField(KD.kernel_code_properties,
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
438 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
439 ".amdhsa_user_sgpr_dispatch_id");
440 if (!hasArchitectedFlatScratch(STI))
441 PrintField(KD.kernel_code_properties,
442 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
443 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
444 ".amdhsa_user_sgpr_flat_scratch_init");
445 if (hasKernargPreload(STI)) {
446 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
447 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
448 ".amdhsa_user_sgpr_kernarg_preload_length");
449 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
450 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
451 ".amdhsa_user_sgpr_kernarg_preload_offset");
452 }
453 PrintField(
454 KD.kernel_code_properties,
455 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
456 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
457 ".amdhsa_user_sgpr_private_segment_size");
458 if (IVersion.Major >= 10)
459 PrintField(KD.kernel_code_properties,
460 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
461 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
462 ".amdhsa_wavefront_size32");
463 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
464 PrintField(KD.kernel_code_properties,
465 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
466 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
467 ".amdhsa_uses_dynamic_stack");
468 PrintField(KD.compute_pgm_rsrc2,
469 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
470 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
471 (hasArchitectedFlatScratch(STI)
472 ? ".amdhsa_enable_private_segment"
473 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
474 PrintField(KD.compute_pgm_rsrc2,
475 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
476 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
477 ".amdhsa_system_sgpr_workgroup_id_x");
478 PrintField(KD.compute_pgm_rsrc2,
479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
480 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
481 ".amdhsa_system_sgpr_workgroup_id_y");
482 PrintField(KD.compute_pgm_rsrc2,
483 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
484 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
485 ".amdhsa_system_sgpr_workgroup_id_z");
486 PrintField(KD.compute_pgm_rsrc2,
487 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
488 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
489 ".amdhsa_system_sgpr_workgroup_info");
490 PrintField(KD.compute_pgm_rsrc2,
491 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
492 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
493 ".amdhsa_system_vgpr_workitem_id");
494
495 // These directives are required.
496 OS << "\t\t.amdhsa_next_free_vgpr ";
497 EmitMCExpr(NextVGPR);
498 OS << '\n';
499
500 OS << "\t\t.amdhsa_next_free_sgpr ";
501 EmitMCExpr(NextSGPR);
502 OS << '\n';
503
504 if (AMDGPU::isGFX90A(STI)) {
505 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
506 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
507 Src: KD.compute_pgm_rsrc3,
508 Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
509 Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx&: getContext());
510 accum_bits = MCBinaryExpr::createAdd(
511 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
512 accum_bits = MCBinaryExpr::createMul(
513 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext());
514 OS << "\t\t.amdhsa_accum_offset ";
515 const MCExpr *New = foldAMDGPUMCExpr(Expr: accum_bits, Ctx&: getContext());
516 printAMDGPUMCExpr(Expr: New, OS, MAI);
517 OS << '\n';
518 }
519
520 if (isGFX1250Plus(STI))
521 PrintField(KD.compute_pgm_rsrc3,
522 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
523 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
524 ".amdhsa_named_barrier_count");
525
526 OS << "\t\t.amdhsa_reserve_vcc ";
527 EmitMCExpr(ReserveVCC);
528 OS << '\n';
529
530 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
531 OS << "\t\t.amdhsa_reserve_flat_scratch ";
532 EmitMCExpr(ReserveFlatScr);
533 OS << '\n';
534 }
535
536 switch (CodeObjectVersion) {
537 default:
538 break;
539 case AMDGPU::AMDHSA_COV4:
540 case AMDGPU::AMDHSA_COV5:
541 if (getTargetID()->isXnackSupported())
542 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
543 break;
544 }
545
546 PrintField(KD.compute_pgm_rsrc1,
547 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
548 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
549 ".amdhsa_float_round_mode_32");
550 PrintField(KD.compute_pgm_rsrc1,
551 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
552 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
553 ".amdhsa_float_round_mode_16_64");
554 PrintField(KD.compute_pgm_rsrc1,
555 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
556 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
557 ".amdhsa_float_denorm_mode_32");
558 PrintField(KD.compute_pgm_rsrc1,
559 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
560 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
561 ".amdhsa_float_denorm_mode_16_64");
562 if (IVersion.Major < 12) {
563 PrintField(KD.compute_pgm_rsrc1,
564 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
565 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
566 ".amdhsa_dx10_clamp");
567 PrintField(KD.compute_pgm_rsrc1,
568 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
569 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
570 ".amdhsa_ieee_mode");
571 }
572 if (IVersion.Major >= 9) {
573 PrintField(KD.compute_pgm_rsrc1,
574 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
575 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
576 ".amdhsa_fp16_overflow");
577 }
578 if (AMDGPU::isGFX90A(STI))
579 PrintField(KD.compute_pgm_rsrc3,
580 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
581 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
582 if (AMDGPU::supportsWGP(STI))
583 PrintField(KD.compute_pgm_rsrc1,
584 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
585 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
586 ".amdhsa_workgroup_processor_mode");
587 if (IVersion.Major >= 10) {
588 PrintField(KD.compute_pgm_rsrc1,
589 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
590 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
591 ".amdhsa_memory_ordered");
592 PrintField(KD.compute_pgm_rsrc1,
593 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
594 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
595 ".amdhsa_forward_progress");
596 }
597 if (IVersion.Major >= 10 && IVersion.Major < 12) {
598 PrintField(KD.compute_pgm_rsrc3,
599 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
600 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
601 ".amdhsa_shared_vgpr_count");
602 }
603 if (IVersion.Major == 11) {
604 PrintField(KD.compute_pgm_rsrc3,
605 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
606 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
607 ".amdhsa_inst_pref_size");
608 }
609 if (IVersion.Major >= 12) {
610 PrintField(KD.compute_pgm_rsrc3,
611 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
612 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
613 ".amdhsa_inst_pref_size");
614 PrintField(KD.compute_pgm_rsrc1,
615 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
616 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
617 ".amdhsa_round_robin_scheduling");
618 }
619 PrintField(
620 KD.compute_pgm_rsrc2,
621 amdhsa::
622 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
623 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
624 ".amdhsa_exception_fp_ieee_invalid_op");
625 PrintField(
626 KD.compute_pgm_rsrc2,
627 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
628 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
629 ".amdhsa_exception_fp_denorm_src");
630 PrintField(
631 KD.compute_pgm_rsrc2,
632 amdhsa::
633 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
634 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
635 ".amdhsa_exception_fp_ieee_div_zero");
636 PrintField(
637 KD.compute_pgm_rsrc2,
638 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
639 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
640 ".amdhsa_exception_fp_ieee_overflow");
641 PrintField(
642 KD.compute_pgm_rsrc2,
643 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
644 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
645 ".amdhsa_exception_fp_ieee_underflow");
646 PrintField(
647 KD.compute_pgm_rsrc2,
648 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
649 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
650 ".amdhsa_exception_fp_ieee_inexact");
651 PrintField(
652 KD.compute_pgm_rsrc2,
653 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
654 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
655 ".amdhsa_exception_int_div_zero");
656
657 OS << "\t.end_amdhsa_kernel\n";
658}
659
660//===----------------------------------------------------------------------===//
661// AMDGPUTargetELFStreamer
662//===----------------------------------------------------------------------===//
663
664AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
665 const MCSubtargetInfo &STI)
666 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
667
668MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
669 return static_cast<MCELFStreamer &>(Streamer);
670}
671
672// A hook for emitting stuff at the end.
673// We use it for emitting the accumulated PAL metadata as a .note record.
674// The PAL metadata is reset after it is emitted.
675void AMDGPUTargetELFStreamer::finish() {
676 ELFObjectWriter &W = getStreamer().getWriter();
677 W.setELFHeaderEFlags(getEFlags());
678 W.setOverrideABIVersion(
679 getELFABIVersion(OS: STI.getTargetTriple(), CodeObjectVersion));
680
681 std::string Blob;
682 const char *Vendor = getPALMetadata()->getVendor();
683 unsigned Type = getPALMetadata()->getType();
684 getPALMetadata()->toBlob(Type, S&: Blob);
685 if (Blob.empty())
686 return;
687 EmitNote(Name: Vendor, DescSize: MCConstantExpr::create(Value: Blob.size(), Ctx&: getContext()), NoteType: Type,
688 EmitDesc: [&](MCELFStreamer &OS) { OS.emitBytes(Data: Blob); });
689
690 // Reset the pal metadata so its data will not affect a compilation that
691 // reuses this object.
692 getPALMetadata()->reset();
693}
694
695void AMDGPUTargetELFStreamer::EmitNote(
696 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
697 function_ref<void(MCELFStreamer &)> EmitDesc) {
698 auto &S = getStreamer();
699 auto &Context = S.getContext();
700
701 auto NameSZ = Name.size() + 1;
702
703 unsigned NoteFlags = 0;
704 // TODO Apparently, this is currently needed for OpenCL as mentioned in
705 // https://reviews.llvm.org/D74995
706 if (isHsaAbi(STI))
707 NoteFlags = ELF::SHF_ALLOC;
708
709 S.pushSection();
710 S.switchSection(
711 Section: Context.getELFSection(Section: ElfNote::SectionName, Type: ELF::SHT_NOTE, Flags: NoteFlags));
712 S.emitInt32(Value: NameSZ); // namesz
713 S.emitValue(Value: DescSZ, Size: 4); // descz
714 S.emitInt32(Value: NoteType); // type
715 S.emitBytes(Data: Name); // name
716 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
717 EmitDesc(S); // desc
718 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
719 S.popSection();
720}
721
722unsigned AMDGPUTargetELFStreamer::getEFlags() {
723 switch (STI.getTargetTriple().getArch()) {
724 default:
725 llvm_unreachable("Unsupported Arch");
726 case Triple::r600:
727 return getEFlagsR600();
728 case Triple::amdgcn:
729 return getEFlagsAMDGCN();
730 }
731}
732
733unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
734 assert(STI.getTargetTriple().getArch() == Triple::r600);
735
736 return getElfMach(GPU: STI.getCPU());
737}
738
739unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
740 assert(STI.getTargetTriple().isAMDGCN());
741
742 switch (STI.getTargetTriple().getOS()) {
743 default:
744 // TODO: Why are some tests have "mingw" listed as OS?
745 // llvm_unreachable("Unsupported OS");
746 case Triple::UnknownOS:
747 return getEFlagsUnknownOS();
748 case Triple::AMDHSA:
749 return getEFlagsAMDHSA();
750 case Triple::AMDPAL:
751 return getEFlagsAMDPAL();
752 case Triple::Mesa3D:
753 return getEFlagsMesa3D();
754 }
755}
756
757unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
758 // TODO: Why are some tests have "mingw" listed as OS?
759 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
760
761 return getEFlagsV3();
762}
763
764unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
765 assert(isHsaAbi(STI));
766
767 if (CodeObjectVersion >= 6)
768 return getEFlagsV6();
769 return getEFlagsV4();
770}
771
772unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
773 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
774
775 return getEFlagsV3();
776}
777
778unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
779 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
780
781 return getEFlagsV3();
782}
783
784unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
785 unsigned EFlagsV3 = 0;
786
787 // mach.
788 EFlagsV3 |= getElfMach(GPU: STI.getCPU());
789
790 // xnack.
791 if (getTargetID()->isXnackOnOrAny())
792 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
793 // sramecc.
794 if (getTargetID()->isSramEccOnOrAny())
795 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
796
797 return EFlagsV3;
798}
799
800unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
801 unsigned EFlagsV4 = 0;
802
803 // mach.
804 EFlagsV4 |= getElfMach(GPU: STI.getCPU());
805
806 // xnack.
807 switch (getTargetID()->getXnackSetting()) {
808 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
809 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
810 break;
811 case AMDGPU::IsaInfo::TargetIDSetting::Any:
812 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
813 break;
814 case AMDGPU::IsaInfo::TargetIDSetting::Off:
815 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
816 break;
817 case AMDGPU::IsaInfo::TargetIDSetting::On:
818 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
819 break;
820 }
821 // sramecc.
822 switch (getTargetID()->getSramEccSetting()) {
823 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
824 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
825 break;
826 case AMDGPU::IsaInfo::TargetIDSetting::Any:
827 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
828 break;
829 case AMDGPU::IsaInfo::TargetIDSetting::Off:
830 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
831 break;
832 case AMDGPU::IsaInfo::TargetIDSetting::On:
833 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
834 break;
835 }
836
837 return EFlagsV4;
838}
839
840unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
841 unsigned Flags = getEFlagsV4();
842
843 unsigned Version = ForceGenericVersion;
844 if (!Version) {
845 switch (parseArchAMDGCN(CPU: STI.getCPU())) {
846 case AMDGPU::GK_GFX9_GENERIC:
847 Version = GenericVersion::GFX9;
848 break;
849 case AMDGPU::GK_GFX9_4_GENERIC:
850 Version = GenericVersion::GFX9_4;
851 break;
852 case AMDGPU::GK_GFX10_1_GENERIC:
853 Version = GenericVersion::GFX10_1;
854 break;
855 case AMDGPU::GK_GFX10_3_GENERIC:
856 Version = GenericVersion::GFX10_3;
857 break;
858 case AMDGPU::GK_GFX11_GENERIC:
859 Version = GenericVersion::GFX11;
860 break;
861 case AMDGPU::GK_GFX12_GENERIC:
862 Version = GenericVersion::GFX12;
863 break;
864 default:
865 break;
866 }
867 }
868
869 // Versions start at 1.
870 if (Version) {
871 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
872 report_fatal_error(reason: "Cannot encode generic code object version " +
873 Twine(Version) +
874 " - no ELF flag can represent this version!");
875 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
876 }
877
878 return Flags;
879}
880
881void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
882
883void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
884 MCStreamer &OS = getStreamer();
885 OS.pushSection();
886 Header.EmitKernelCodeT(OS, Ctx&: getContext());
887 OS.popSection();
888}
889
890void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
891 unsigned Type) {
892 auto *Symbol = static_cast<MCSymbolELF *>(
893 getStreamer().getContext().getOrCreateSymbol(Name: SymbolName));
894 Symbol->setType(Type);
895}
896
897void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
898 Align Alignment) {
899 auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);
900 SymbolELF->setType(ELF::STT_OBJECT);
901
902 if (!SymbolELF->isBindingSet())
903 SymbolELF->setBinding(ELF::STB_GLOBAL);
904
905 if (SymbolELF->declareCommon(Size, Alignment)) {
906 report_fatal_error(reason: "Symbol: " + Symbol->getName() +
907 " redeclared as different type");
908 }
909
910 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
911 SymbolELF->setSize(MCConstantExpr::create(Value: Size, Ctx&: getContext()));
912}
913
914bool AMDGPUTargetELFStreamer::EmitISAVersion() {
915 // Create two labels to mark the beginning and end of the desc field
916 // and a MCExpr to calculate the size of the desc field.
917 auto &Context = getContext();
918 auto *DescBegin = Context.createTempSymbol();
919 auto *DescEnd = Context.createTempSymbol();
920 auto *DescSZ = MCBinaryExpr::createSub(
921 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
922 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
923
924 EmitNote(Name: ElfNote::NoteNameV2, DescSZ, NoteType: ELF::NT_AMD_HSA_ISA_NAME,
925 EmitDesc: [&](MCELFStreamer &OS) {
926 OS.emitLabel(Symbol: DescBegin);
927 OS.emitBytes(Data: getTargetID()->toString());
928 OS.emitLabel(Symbol: DescEnd);
929 });
930 return true;
931}
932
933bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
934 bool Strict) {
935 HSAMD::V3::MetadataVerifier Verifier(Strict);
936 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
937 return false;
938
939 std::string HSAMetadataString;
940 HSAMetadataDoc.writeToBlob(Blob&: HSAMetadataString);
941
942 // Create two labels to mark the beginning and end of the desc field
943 // and a MCExpr to calculate the size of the desc field.
944 auto &Context = getContext();
945 auto *DescBegin = Context.createTempSymbol();
946 auto *DescEnd = Context.createTempSymbol();
947 auto *DescSZ = MCBinaryExpr::createSub(
948 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
949 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
950
951 EmitNote(Name: ElfNote::NoteNameV3, DescSZ, NoteType: ELF::NT_AMDGPU_METADATA,
952 EmitDesc: [&](MCELFStreamer &OS) {
953 OS.emitLabel(Symbol: DescBegin);
954 OS.emitBytes(Data: HSAMetadataString);
955 OS.emitLabel(Symbol: DescEnd);
956 });
957 return true;
958}
959
960bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
961 const uint32_t Encoded_s_code_end = 0xbf9f0000;
962 const uint32_t Encoded_s_nop = 0xbf800000;
963 uint32_t Encoded_pad = Encoded_s_code_end;
964
965 // Instruction cache line size in bytes.
966 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
967 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
968
969 // Extra padding amount in bytes to support prefetch mode 3.
970 unsigned FillSize = 3 * CacheLineSize;
971
972 if (AMDGPU::isGFX90A(STI)) {
973 Encoded_pad = Encoded_s_nop;
974 FillSize = 16 * CacheLineSize;
975 }
976
977 MCStreamer &OS = getStreamer();
978 OS.pushSection();
979 OS.emitValueToAlignment(Alignment: Align(CacheLineSize), Fill: Encoded_pad, FillLen: 4);
980 for (unsigned I = 0; I < FillSize; I += 4)
981 OS.emitInt32(Value: Encoded_pad);
982 OS.popSection();
983 return true;
984}
985
986void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
987 const MCSubtargetInfo &STI, StringRef KernelName,
988 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
989 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
990 const MCExpr *ReserveFlatScr) {
991 auto &Streamer = getStreamer();
992 auto &Context = Streamer.getContext();
993
994 auto *KernelCodeSymbol =
995 static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Name: Twine(KernelName)));
996 auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(
997 Context.getOrCreateSymbol(Name: Twine(KernelName) + Twine(".kd")));
998
999 // Copy kernel descriptor symbol's binding, other and visibility from the
1000 // kernel code symbol.
1001 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
1002 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1003 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1004 // Kernel descriptor symbol's type and size are fixed.
1005 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
1006 KernelDescriptorSymbol->setSize(
1007 MCConstantExpr::create(Value: sizeof(amdhsa::kernel_descriptor_t), Ctx&: Context));
1008
1009 // The visibility of the kernel code symbol must be protected or less to allow
1010 // static relocations from the kernel descriptor to be used.
1011 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
1012 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
1013
1014 Streamer.emitLabel(Symbol: KernelDescriptorSymbol);
1015 Streamer.emitValue(
1016 Value: KernelDescriptor.group_segment_fixed_size,
1017 Size: sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
1018 Streamer.emitValue(
1019 Value: KernelDescriptor.private_segment_fixed_size,
1020 Size: sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
1021 Streamer.emitValue(Value: KernelDescriptor.kernarg_size,
1022 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
1023
1024 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1025 Streamer.emitInt8(Value: 0u);
1026
1027 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1028 // expression being created is:
1029 // (start of kernel code) - (start of kernel descriptor)
1030 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1031 Streamer.emitValue(
1032 Value: MCBinaryExpr::createSub(
1033 LHS: MCSymbolRefExpr::create(Symbol: KernelCodeSymbol, specifier: AMDGPUMCExpr::S_REL64,
1034 Ctx&: Context),
1035 RHS: MCSymbolRefExpr::create(Symbol: KernelDescriptorSymbol, Ctx&: Context), Ctx&: Context),
1036 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
1037 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1038 Streamer.emitInt8(Value: 0u);
1039 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc3,
1040 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
1041 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc1,
1042 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
1043 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc2,
1044 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
1045 Streamer.emitValue(
1046 Value: KernelDescriptor.kernel_code_properties,
1047 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
1048 Streamer.emitValue(Value: KernelDescriptor.kernarg_preload,
1049 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
1050 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1051 Streamer.emitInt8(Value: 0u);
1052}
1053