1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPUMCExpr.h"
15#include "AMDGPUMCKernelDescriptor.h"
16#include "AMDGPUMCTargetDesc.h"
17#include "AMDGPUPTNote.h"
18#include "Utils/AMDGPUBaseInfo.h"
19#include "Utils/AMDKernelCodeTUtils.h"
20#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
21#include "llvm/BinaryFormat/ELF.h"
22#include "llvm/MC/MCAsmInfo.h"
23#include "llvm/MC/MCAssembler.h"
24#include "llvm/MC/MCContext.h"
25#include "llvm/MC/MCELFObjectWriter.h"
26#include "llvm/MC/MCELFStreamer.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/AMDGPUMetadata.h"
29#include "llvm/Support/AMDHSAKernelDescriptor.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/FormattedStream.h"
32#include "llvm/TargetParser/TargetParser.h"
33
34using namespace llvm;
35using namespace llvm::AMDGPU;
36
37//===----------------------------------------------------------------------===//
38// AMDGPUTargetStreamer
39//===----------------------------------------------------------------------===//
40
41static cl::opt<unsigned>
42 ForceGenericVersion("amdgpu-force-generic-version",
43 cl::desc("Force a specific generic_v<N> flag to be "
44 "added. For testing purposes only."),
45 cl::ReallyHidden, cl::init(Val: 0));
46
47bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
48 msgpack::Document HSAMetadataDoc;
49 if (!HSAMetadataDoc.fromYAML(S: HSAMetadataString))
50 return false;
51 return EmitHSAMetadata(HSAMetadata&: HSAMetadataDoc, Strict: false);
52}
53
54StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
55 AMDGPU::GPUKind AK;
56
57 // clang-format off
58 switch (ElfMach) {
59 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
60 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
61 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
62 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
63 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
65 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
66 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
67 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
68 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
69 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
70 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
71 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
72 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
73 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
74 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170: AK = GK_GFX1170; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251: AK = GK_GFX1251; break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310: AK = GK_GFX1310; break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
128 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
129 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
130 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC: AK = GK_GFX12_5_GENERIC; break;
131 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
132 default: AK = GK_NONE; break;
133 }
134 // clang-format on
135
136 StringRef GPUName = getArchNameAMDGCN(AK);
137 if (GPUName != "")
138 return GPUName;
139 return getArchNameR600(AK);
140}
141
142unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
143 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
144 if (AK == AMDGPU::GPUKind::GK_NONE)
145 AK = parseArchR600(CPU: GPU);
146
147 // clang-format off
148 switch (AK) {
149 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
150 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
151 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
152 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
153 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
154 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
155 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
156 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
157 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
158 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
159 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
160 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
161 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
162 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
163 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
164 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
165 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
166 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
167 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
168 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
169 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
170 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
171 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
172 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
173 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
174 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
175 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
176 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
177 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
178 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
179 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
180 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
181 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
182 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
183 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
184 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
185 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
186 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
187 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
188 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
189 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
190 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
191 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
192 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
193 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
194 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
195 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
196 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
197 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
198 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
199 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
200 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
201 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
202 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
203 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
204 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
205 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
206 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
207 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
208 case GK_GFX1170: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170;
209 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
210 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
211 case GK_GFX1250: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250;
212 case GK_GFX1251: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251;
213 case GK_GFX1310: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310;
214 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
215 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
216 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
217 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
218 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
219 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
220 case GK_GFX12_5_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_5_GENERIC;
221 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
222 }
223 // clang-format on
224
225 llvm_unreachable("unknown GPU");
226}
227
228//===----------------------------------------------------------------------===//
229// AMDGPUTargetAsmStreamer
230//===----------------------------------------------------------------------===//
231
232AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
233 formatted_raw_ostream &OS)
234 : AMDGPUTargetStreamer(S), OS(OS) { }
235
236// A hook for emitting stuff at the end.
237// We use it for emitting the accumulated PAL metadata as directives.
238// The PAL metadata is reset after it is emitted.
239void AMDGPUTargetAsmStreamer::finish() {
240 std::string S;
241 getPALMetadata()->toString(S);
242 OS << S;
243
244 // Reset the pal metadata so its data will not affect a compilation that
245 // reuses this object.
246 getPALMetadata()->reset();
247}
248
249void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
250 OS << "\t.amdgcn_target \"" << *getTargetID() << "\"\n";
251}
252
253void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
254 unsigned COV) {
255 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
256 OS << "\t.amdhsa_code_object_version " << COV << '\n';
257}
258
259void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
260 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
261 const MCAsmInfo *MAI) {
262 printAMDGPUMCExpr(Expr: foldAMDGPUMCExpr(Expr, Ctx&: getContext()), OS, MAI);
263 };
264
265 OS << "\t.amd_kernel_code_t\n";
266 Header.EmitKernelCodeT(OS, Ctx&: getContext(), Helper: FoldAndPrint);
267 OS << "\t.end_amd_kernel_code_t\n";
268}
269
270void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
271 unsigned Type) {
272 switch (Type) {
273 default: llvm_unreachable("Invalid AMDGPU symbol type");
274 case ELF::STT_AMDGPU_HSA_KERNEL:
275 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
276 break;
277 }
278}
279
280void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
281 Align Alignment) {
282 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
283 << Alignment.value() << '\n';
284}
285
286void AMDGPUTargetAsmStreamer::EmitMCResourceInfo(
287 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
288 const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier,
289 const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC,
290 const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack,
291 const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) {
292#define PRINT_RES_INFO(ARG) \
293 OS << "\t.set "; \
294 ARG->print(OS, getContext().getAsmInfo()); \
295 OS << ", "; \
296 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
297 Streamer.addBlankLine();
298
299 PRINT_RES_INFO(NumVGPR);
300 PRINT_RES_INFO(NumAGPR);
301 PRINT_RES_INFO(NumExplicitSGPR);
302 PRINT_RES_INFO(NumNamedBarrier);
303 PRINT_RES_INFO(PrivateSegmentSize);
304 PRINT_RES_INFO(UsesVCC);
305 PRINT_RES_INFO(UsesFlatScratch);
306 PRINT_RES_INFO(HasDynamicallySizedStack);
307 PRINT_RES_INFO(HasRecursion);
308 PRINT_RES_INFO(HasIndirectCall);
309#undef PRINT_RES_INFO
310}
311
312void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(
313 const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR,
314 const MCSymbol *MaxNamedBarrier) {
315#define PRINT_RES_INFO(ARG) \
316 OS << "\t.set "; \
317 ARG->print(OS, getContext().getAsmInfo()); \
318 OS << ", "; \
319 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
320 Streamer.addBlankLine();
321
322 PRINT_RES_INFO(MaxVGPR);
323 PRINT_RES_INFO(MaxAGPR);
324 PRINT_RES_INFO(MaxSGPR);
325 PRINT_RES_INFO(MaxNamedBarrier);
326#undef PRINT_RES_INFO
327}
328
329bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
330 OS << "\t.amd_amdgpu_isa \"" << getTargetID() << "\"\n";
331 return true;
332}
333
334bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
335 msgpack::Document &HSAMetadataDoc, bool Strict) {
336 HSAMD::V3::MetadataVerifier Verifier(Strict);
337 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
338 return false;
339
340 std::string HSAMetadataString;
341 raw_string_ostream StrOS(HSAMetadataString);
342 HSAMetadataDoc.toYAML(OS&: StrOS);
343
344 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
345 OS << StrOS.str() << '\n';
346 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
347 return true;
348}
349
350bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
351 const uint32_t Encoded_s_code_end = 0xbf9f0000;
352 const uint32_t Encoded_s_nop = 0xbf800000;
353 uint32_t Encoded_pad = Encoded_s_code_end;
354
355 // Instruction cache line size in bytes.
356 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
357 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
358
359 // Extra padding amount in bytes to support prefetch mode 3.
360 unsigned FillSize = 3 * CacheLineSize;
361
362 if (AMDGPU::isGFX90A(STI)) {
363 Encoded_pad = Encoded_s_nop;
364 FillSize = 16 * CacheLineSize;
365 }
366
367 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
368 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
369 return true;
370}
371
372void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
373 const MCSubtargetInfo &STI, StringRef KernelName,
374 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
375 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
376 const MCExpr *ReserveFlatScr) {
377 IsaVersion IVersion = getIsaVersion(GPU: STI.getCPU());
378 const MCAsmInfo *MAI = getContext().getAsmInfo();
379
380 OS << "\t.amdhsa_kernel " << KernelName << '\n';
381
382 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
383 StringRef Directive) {
384 OS << "\t\t" << Directive << ' ';
385 const MCExpr *ShiftedAndMaskedExpr =
386 MCKernelDescriptor::bits_get(Src: Expr, Shift, Mask, Ctx&: getContext());
387 const MCExpr *New = foldAMDGPUMCExpr(Expr: ShiftedAndMaskedExpr, Ctx&: getContext());
388 printAMDGPUMCExpr(Expr: New, OS, MAI);
389 OS << '\n';
390 };
391
392 auto EmitMCExpr = [&](const MCExpr *Value) {
393 const MCExpr *NewExpr = foldAMDGPUMCExpr(Expr: Value, Ctx&: getContext());
394 printAMDGPUMCExpr(Expr: NewExpr, OS, MAI);
395 };
396
397 OS << "\t\t.amdhsa_group_segment_fixed_size ";
398 EmitMCExpr(KD.group_segment_fixed_size);
399 OS << '\n';
400
401 OS << "\t\t.amdhsa_private_segment_fixed_size ";
402 EmitMCExpr(KD.private_segment_fixed_size);
403 OS << '\n';
404
405 OS << "\t\t.amdhsa_kernarg_size ";
406 EmitMCExpr(KD.kernarg_size);
407 OS << '\n';
408
409 if (isGFX1250Plus(STI)) {
410 PrintField(KD.compute_pgm_rsrc2,
411 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
412 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
413 ".amdhsa_user_sgpr_count");
414 } else {
415 PrintField(KD.compute_pgm_rsrc2,
416 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
417 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
418 ".amdhsa_user_sgpr_count");
419 }
420
421 if (!hasArchitectedFlatScratch(STI))
422 PrintField(
423 KD.kernel_code_properties,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
425 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
426 ".amdhsa_user_sgpr_private_segment_buffer");
427 PrintField(KD.kernel_code_properties,
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
429 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
430 ".amdhsa_user_sgpr_dispatch_ptr");
431 PrintField(KD.kernel_code_properties,
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
433 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
434 ".amdhsa_user_sgpr_queue_ptr");
435 PrintField(KD.kernel_code_properties,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
437 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
438 ".amdhsa_user_sgpr_kernarg_segment_ptr");
439 PrintField(KD.kernel_code_properties,
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
442 ".amdhsa_user_sgpr_dispatch_id");
443 if (!hasArchitectedFlatScratch(STI))
444 PrintField(KD.kernel_code_properties,
445 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
446 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
447 ".amdhsa_user_sgpr_flat_scratch_init");
448 if (hasKernargPreload(STI)) {
449 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
450 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
451 ".amdhsa_user_sgpr_kernarg_preload_length");
452 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
453 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
454 ".amdhsa_user_sgpr_kernarg_preload_offset");
455 }
456 PrintField(
457 KD.kernel_code_properties,
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
460 ".amdhsa_user_sgpr_private_segment_size");
461 if (IVersion.Major >= 10)
462 PrintField(KD.kernel_code_properties,
463 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
464 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
465 ".amdhsa_wavefront_size32");
466 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
467 PrintField(KD.kernel_code_properties,
468 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
469 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
470 ".amdhsa_uses_dynamic_stack");
471 PrintField(KD.compute_pgm_rsrc2,
472 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
474 (hasArchitectedFlatScratch(STI)
475 ? ".amdhsa_enable_private_segment"
476 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
477 PrintField(KD.compute_pgm_rsrc2,
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
479 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
480 ".amdhsa_system_sgpr_workgroup_id_x");
481 PrintField(KD.compute_pgm_rsrc2,
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
483 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
484 ".amdhsa_system_sgpr_workgroup_id_y");
485 PrintField(KD.compute_pgm_rsrc2,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
487 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
488 ".amdhsa_system_sgpr_workgroup_id_z");
489 PrintField(KD.compute_pgm_rsrc2,
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
491 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
492 ".amdhsa_system_sgpr_workgroup_info");
493 PrintField(KD.compute_pgm_rsrc2,
494 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
495 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
496 ".amdhsa_system_vgpr_workitem_id");
497
498 // These directives are required.
499 OS << "\t\t.amdhsa_next_free_vgpr ";
500 EmitMCExpr(NextVGPR);
501 OS << '\n';
502
503 OS << "\t\t.amdhsa_next_free_sgpr ";
504 EmitMCExpr(NextSGPR);
505 OS << '\n';
506
507 if (AMDGPU::isGFX90A(STI)) {
508 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
509 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
510 Src: KD.compute_pgm_rsrc3,
511 Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
512 Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx&: getContext());
513 accum_bits = MCBinaryExpr::createAdd(
514 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
515 accum_bits = MCBinaryExpr::createMul(
516 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext());
517 OS << "\t\t.amdhsa_accum_offset ";
518 const MCExpr *New = foldAMDGPUMCExpr(Expr: accum_bits, Ctx&: getContext());
519 printAMDGPUMCExpr(Expr: New, OS, MAI);
520 OS << '\n';
521 }
522
523 if (isGFX1250Plus(STI))
524 PrintField(KD.compute_pgm_rsrc3,
525 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
526 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
527 ".amdhsa_named_barrier_count");
528
529 OS << "\t\t.amdhsa_reserve_vcc ";
530 EmitMCExpr(ReserveVCC);
531 OS << '\n';
532
533 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
534 OS << "\t\t.amdhsa_reserve_flat_scratch ";
535 EmitMCExpr(ReserveFlatScr);
536 OS << '\n';
537 }
538
539 switch (CodeObjectVersion) {
540 default:
541 break;
542 case AMDGPU::AMDHSA_COV4:
543 case AMDGPU::AMDHSA_COV5:
544 if (getTargetID()->isXnackSupported())
545 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
546 break;
547 }
548
549 PrintField(KD.compute_pgm_rsrc1,
550 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
551 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
552 ".amdhsa_float_round_mode_32");
553 PrintField(KD.compute_pgm_rsrc1,
554 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
555 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
556 ".amdhsa_float_round_mode_16_64");
557 PrintField(KD.compute_pgm_rsrc1,
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
559 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
560 ".amdhsa_float_denorm_mode_32");
561 PrintField(KD.compute_pgm_rsrc1,
562 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
563 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
564 ".amdhsa_float_denorm_mode_16_64");
565 if (STI.hasFeature(Feature: AMDGPU::FeatureDX10ClampAndIEEEMode)) {
566 PrintField(KD.compute_pgm_rsrc1,
567 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
568 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
569 ".amdhsa_dx10_clamp");
570 PrintField(KD.compute_pgm_rsrc1,
571 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
572 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
573 ".amdhsa_ieee_mode");
574 }
575 if (IVersion.Major >= 9) {
576 PrintField(KD.compute_pgm_rsrc1,
577 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
578 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
579 ".amdhsa_fp16_overflow");
580 }
581 if (AMDGPU::isGFX90A(STI))
582 PrintField(KD.compute_pgm_rsrc3,
583 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
584 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
585 if (AMDGPU::supportsWGP(STI))
586 PrintField(KD.compute_pgm_rsrc1,
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
588 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
589 ".amdhsa_workgroup_processor_mode");
590 if (IVersion.Major >= 10) {
591 PrintField(KD.compute_pgm_rsrc1,
592 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
593 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
594 ".amdhsa_memory_ordered");
595 PrintField(KD.compute_pgm_rsrc1,
596 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
597 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
598 ".amdhsa_forward_progress");
599 }
600 if (IVersion.Major >= 10 && IVersion.Major < 12) {
601 PrintField(KD.compute_pgm_rsrc3,
602 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
603 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
604 ".amdhsa_shared_vgpr_count");
605 }
606 if (IVersion.Major == 11) {
607 PrintField(KD.compute_pgm_rsrc3,
608 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
609 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
610 ".amdhsa_inst_pref_size");
611 }
612 if (IVersion.Major >= 12) {
613 PrintField(KD.compute_pgm_rsrc3,
614 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
615 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
616 ".amdhsa_inst_pref_size");
617 PrintField(KD.compute_pgm_rsrc1,
618 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
619 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
620 ".amdhsa_round_robin_scheduling");
621 }
622 PrintField(
623 KD.compute_pgm_rsrc2,
624 amdhsa::
625 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
627 ".amdhsa_exception_fp_ieee_invalid_op");
628 PrintField(
629 KD.compute_pgm_rsrc2,
630 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
631 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
632 ".amdhsa_exception_fp_denorm_src");
633 PrintField(
634 KD.compute_pgm_rsrc2,
635 amdhsa::
636 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
637 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
638 ".amdhsa_exception_fp_ieee_div_zero");
639 PrintField(
640 KD.compute_pgm_rsrc2,
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
642 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
643 ".amdhsa_exception_fp_ieee_overflow");
644 PrintField(
645 KD.compute_pgm_rsrc2,
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
647 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
648 ".amdhsa_exception_fp_ieee_underflow");
649 PrintField(
650 KD.compute_pgm_rsrc2,
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
652 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
653 ".amdhsa_exception_fp_ieee_inexact");
654 PrintField(
655 KD.compute_pgm_rsrc2,
656 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
657 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
658 ".amdhsa_exception_int_div_zero");
659
660 OS << "\t.end_amdhsa_kernel\n";
661}
662
663//===----------------------------------------------------------------------===//
664// AMDGPUTargetELFStreamer
665//===----------------------------------------------------------------------===//
666
667AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
668 const MCSubtargetInfo &STI)
669 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
670
671MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
672 return static_cast<MCELFStreamer &>(Streamer);
673}
674
675// A hook for emitting stuff at the end.
676// We use it for emitting the accumulated PAL metadata as a .note record.
677// The PAL metadata is reset after it is emitted.
678void AMDGPUTargetELFStreamer::finish() {
679 ELFObjectWriter &W = getStreamer().getWriter();
680 W.setELFHeaderEFlags(getEFlags());
681 W.setOverrideABIVersion(
682 getELFABIVersion(OS: STI.getTargetTriple(), CodeObjectVersion));
683
684 std::string Blob;
685 const char *Vendor = getPALMetadata()->getVendor();
686 unsigned Type = getPALMetadata()->getType();
687 getPALMetadata()->toBlob(Type, S&: Blob);
688 if (Blob.empty())
689 return;
690 EmitNote(Name: Vendor, DescSize: MCConstantExpr::create(Value: Blob.size(), Ctx&: getContext()), NoteType: Type,
691 EmitDesc: [&](MCELFStreamer &OS) { OS.emitBytes(Data: Blob); });
692
693 // Reset the pal metadata so its data will not affect a compilation that
694 // reuses this object.
695 getPALMetadata()->reset();
696}
697
698void AMDGPUTargetELFStreamer::EmitNote(
699 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
700 function_ref<void(MCELFStreamer &)> EmitDesc) {
701 auto &S = getStreamer();
702 auto &Context = S.getContext();
703
704 auto NameSZ = Name.size() + 1;
705
706 unsigned NoteFlags = 0;
707 // TODO Apparently, this is currently needed for OpenCL as mentioned in
708 // https://reviews.llvm.org/D74995
709 if (isHsaAbi(STI))
710 NoteFlags = ELF::SHF_ALLOC;
711
712 S.pushSection();
713 S.switchSection(
714 Section: Context.getELFSection(Section: ElfNote::SectionName, Type: ELF::SHT_NOTE, Flags: NoteFlags));
715 S.emitInt32(Value: NameSZ); // namesz
716 S.emitValue(Value: DescSZ, Size: 4); // descz
717 S.emitInt32(Value: NoteType); // type
718 S.emitBytes(Data: Name); // name
719 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
720 EmitDesc(S); // desc
721 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
722 S.popSection();
723}
724
725unsigned AMDGPUTargetELFStreamer::getEFlags() {
726 switch (STI.getTargetTriple().getArch()) {
727 default:
728 llvm_unreachable("Unsupported Arch");
729 case Triple::r600:
730 return getEFlagsR600();
731 case Triple::amdgcn:
732 return getEFlagsAMDGCN();
733 }
734}
735
736unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
737 assert(STI.getTargetTriple().getArch() == Triple::r600);
738
739 return getElfMach(GPU: STI.getCPU());
740}
741
742unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
743 assert(STI.getTargetTriple().isAMDGCN());
744
745 switch (STI.getTargetTriple().getOS()) {
746 default:
747 // TODO: Why are some tests have "mingw" listed as OS?
748 // llvm_unreachable("Unsupported OS");
749 case Triple::UnknownOS:
750 return getEFlagsUnknownOS();
751 case Triple::AMDHSA:
752 return getEFlagsAMDHSA();
753 case Triple::AMDPAL:
754 return getEFlagsAMDPAL();
755 case Triple::Mesa3D:
756 return getEFlagsMesa3D();
757 }
758}
759
760unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
761 // TODO: Why are some tests have "mingw" listed as OS?
762 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
763
764 return getEFlagsV3();
765}
766
767unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
768 assert(isHsaAbi(STI));
769
770 if (CodeObjectVersion >= 6)
771 return getEFlagsV6();
772 return getEFlagsV4();
773}
774
775unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
776 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
777
778 return getEFlagsV3();
779}
780
781unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
782 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
783
784 return getEFlagsV3();
785}
786
787unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
788 unsigned EFlagsV3 = 0;
789
790 // mach.
791 EFlagsV3 |= getElfMach(GPU: STI.getCPU());
792
793 // xnack.
794 if (getTargetID()->isXnackOnOrAny())
795 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
796 // sramecc.
797 if (getTargetID()->isSramEccOnOrAny())
798 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
799
800 return EFlagsV3;
801}
802
803unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
804 unsigned EFlagsV4 = 0;
805
806 // mach.
807 EFlagsV4 |= getElfMach(GPU: STI.getCPU());
808
809 // xnack.
810 switch (getTargetID()->getXnackSetting()) {
811 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
812 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
813 break;
814 case AMDGPU::IsaInfo::TargetIDSetting::Any:
815 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
816 break;
817 case AMDGPU::IsaInfo::TargetIDSetting::Off:
818 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
819 break;
820 case AMDGPU::IsaInfo::TargetIDSetting::On:
821 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
822 break;
823 }
824 // sramecc.
825 switch (getTargetID()->getSramEccSetting()) {
826 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
827 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
828 break;
829 case AMDGPU::IsaInfo::TargetIDSetting::Any:
830 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
831 break;
832 case AMDGPU::IsaInfo::TargetIDSetting::Off:
833 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
834 break;
835 case AMDGPU::IsaInfo::TargetIDSetting::On:
836 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
837 break;
838 }
839
840 return EFlagsV4;
841}
842
843unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
844 unsigned Flags = getEFlagsV4();
845
846 unsigned Version = ForceGenericVersion;
847 if (!Version) {
848 switch (parseArchAMDGCN(CPU: STI.getCPU())) {
849 case AMDGPU::GK_GFX9_GENERIC:
850 Version = GenericVersion::GFX9;
851 break;
852 case AMDGPU::GK_GFX9_4_GENERIC:
853 Version = GenericVersion::GFX9_4;
854 break;
855 case AMDGPU::GK_GFX10_1_GENERIC:
856 Version = GenericVersion::GFX10_1;
857 break;
858 case AMDGPU::GK_GFX10_3_GENERIC:
859 Version = GenericVersion::GFX10_3;
860 break;
861 case AMDGPU::GK_GFX11_GENERIC:
862 Version = GenericVersion::GFX11;
863 break;
864 case AMDGPU::GK_GFX12_GENERIC:
865 Version = GenericVersion::GFX12;
866 break;
867 case AMDGPU::GK_GFX12_5_GENERIC:
868 Version = GenericVersion::GFX12_5;
869 break;
870 default:
871 break;
872 }
873 }
874
875 // Versions start at 1.
876 if (Version) {
877 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
878 report_fatal_error(reason: "Cannot encode generic code object version " +
879 Twine(Version) +
880 " - no ELF flag can represent this version!");
881 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
882 }
883
884 return Flags;
885}
886
887void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
888
889void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
890 MCStreamer &OS = getStreamer();
891 OS.pushSection();
892 Header.EmitKernelCodeT(OS, Ctx&: getContext());
893 OS.popSection();
894}
895
896void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
897 unsigned Type) {
898 auto *Symbol = static_cast<MCSymbolELF *>(
899 getStreamer().getContext().getOrCreateSymbol(Name: SymbolName));
900 Symbol->setType(Type);
901}
902
903void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
904 Align Alignment) {
905 auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);
906 SymbolELF->setType(ELF::STT_OBJECT);
907
908 if (!SymbolELF->isBindingSet())
909 SymbolELF->setBinding(ELF::STB_GLOBAL);
910
911 if (SymbolELF->declareCommon(Size, Alignment)) {
912 report_fatal_error(reason: "Symbol: " + Symbol->getName() +
913 " redeclared as different type");
914 }
915
916 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
917 SymbolELF->setSize(MCConstantExpr::create(Value: Size, Ctx&: getContext()));
918}
919
920bool AMDGPUTargetELFStreamer::EmitISAVersion() {
921 // Create two labels to mark the beginning and end of the desc field
922 // and a MCExpr to calculate the size of the desc field.
923 auto &Context = getContext();
924 auto *DescBegin = Context.createTempSymbol();
925 auto *DescEnd = Context.createTempSymbol();
926 auto *DescSZ = MCBinaryExpr::createSub(
927 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
928 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
929
930 EmitNote(Name: ElfNote::NoteNameV2, DescSZ, NoteType: ELF::NT_AMD_HSA_ISA_NAME,
931 EmitDesc: [&](MCELFStreamer &OS) {
932 OS.emitLabel(Symbol: DescBegin);
933
934 SmallString<32> Str;
935 raw_svector_ostream StrOS(Str);
936 StrOS << *getTargetID();
937
938 OS.emitBytes(Data: StrOS.str());
939 OS.emitLabel(Symbol: DescEnd);
940 });
941 return true;
942}
943
944bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
945 bool Strict) {
946 HSAMD::V3::MetadataVerifier Verifier(Strict);
947 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
948 return false;
949
950 std::string HSAMetadataString;
951 HSAMetadataDoc.writeToBlob(Blob&: HSAMetadataString);
952
953 // Create two labels to mark the beginning and end of the desc field
954 // and a MCExpr to calculate the size of the desc field.
955 auto &Context = getContext();
956 auto *DescBegin = Context.createTempSymbol();
957 auto *DescEnd = Context.createTempSymbol();
958 auto *DescSZ = MCBinaryExpr::createSub(
959 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
960 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
961
962 EmitNote(Name: ElfNote::NoteNameV3, DescSZ, NoteType: ELF::NT_AMDGPU_METADATA,
963 EmitDesc: [&](MCELFStreamer &OS) {
964 OS.emitLabel(Symbol: DescBegin);
965 OS.emitBytes(Data: HSAMetadataString);
966 OS.emitLabel(Symbol: DescEnd);
967 });
968 return true;
969}
970
971bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
972 const uint32_t Encoded_s_code_end = 0xbf9f0000;
973 const uint32_t Encoded_s_nop = 0xbf800000;
974 uint32_t Encoded_pad = Encoded_s_code_end;
975
976 // Instruction cache line size in bytes.
977 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
978 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
979
980 // Extra padding amount in bytes to support prefetch mode 3.
981 unsigned FillSize = 3 * CacheLineSize;
982
983 if (AMDGPU::isGFX90A(STI)) {
984 Encoded_pad = Encoded_s_nop;
985 FillSize = 16 * CacheLineSize;
986 }
987
988 MCStreamer &OS = getStreamer();
989 OS.pushSection();
990 OS.emitValueToAlignment(Alignment: Align(CacheLineSize), Fill: Encoded_pad, FillLen: 4);
991 for (unsigned I = 0; I < FillSize; I += 4)
992 OS.emitInt32(Value: Encoded_pad);
993 OS.popSection();
994 return true;
995}
996
997void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
998 const MCSubtargetInfo &STI, StringRef KernelName,
999 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
1000 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
1001 const MCExpr *ReserveFlatScr) {
1002 auto &Streamer = getStreamer();
1003 auto &Context = Streamer.getContext();
1004
1005 auto *KernelCodeSymbol =
1006 static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Name: Twine(KernelName)));
1007 auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(
1008 Context.getOrCreateSymbol(Name: Twine(KernelName) + Twine(".kd")));
1009
1010 // Copy kernel descriptor symbol's binding, other and visibility from the
1011 // kernel code symbol.
1012 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
1013 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1014 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1015 // Kernel descriptor symbol's type and size are fixed.
1016 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
1017 KernelDescriptorSymbol->setSize(
1018 MCConstantExpr::create(Value: sizeof(amdhsa::kernel_descriptor_t), Ctx&: Context));
1019
1020 // The visibility of the kernel code symbol must be protected or less to allow
1021 // static relocations from the kernel descriptor to be used.
1022 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
1023 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
1024
1025 Streamer.emitLabel(Symbol: KernelDescriptorSymbol);
1026 Streamer.emitValue(
1027 Value: KernelDescriptor.group_segment_fixed_size,
1028 Size: sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
1029 Streamer.emitValue(
1030 Value: KernelDescriptor.private_segment_fixed_size,
1031 Size: sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
1032 Streamer.emitValue(Value: KernelDescriptor.kernarg_size,
1033 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
1034
1035 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1036 Streamer.emitInt8(Value: 0u);
1037
1038 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1039 // expression being created is:
1040 // (start of kernel code) - (start of kernel descriptor)
1041 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1042 Streamer.emitValue(
1043 Value: MCBinaryExpr::createSub(
1044 LHS: MCSymbolRefExpr::create(Symbol: KernelCodeSymbol, specifier: AMDGPUMCExpr::S_REL64,
1045 Ctx&: Context),
1046 RHS: MCSymbolRefExpr::create(Symbol: KernelDescriptorSymbol, Ctx&: Context), Ctx&: Context),
1047 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
1048 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1049 Streamer.emitInt8(Value: 0u);
1050 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc3,
1051 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
1052 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc1,
1053 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
1054 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc2,
1055 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
1056 Streamer.emitValue(
1057 Value: KernelDescriptor.kernel_code_properties,
1058 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
1059 Streamer.emitValue(Value: KernelDescriptor.kernarg_preload,
1060 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
1061 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1062 Streamer.emitInt8(Value: 0u);
1063}
1064