1//===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides AMDGPU specific target streamer methods.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPUTargetStreamer.h"
14#include "AMDGPUMCExpr.h"
15#include "AMDGPUMCKernelDescriptor.h"
16#include "AMDGPUPTNote.h"
17#include "Utils/AMDGPUBaseInfo.h"
18#include "Utils/AMDKernelCodeTUtils.h"
19#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
20#include "llvm/BinaryFormat/ELF.h"
21#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/MC/MCAssembler.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCELFObjectWriter.h"
25#include "llvm/MC/MCELFStreamer.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/Support/AMDGPUMetadata.h"
28#include "llvm/Support/AMDHSAKernelDescriptor.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/FormattedStream.h"
31#include "llvm/TargetParser/TargetParser.h"
32
33using namespace llvm;
34using namespace llvm::AMDGPU;
35
36//===----------------------------------------------------------------------===//
37// AMDGPUTargetStreamer
38//===----------------------------------------------------------------------===//
39
40static cl::opt<unsigned>
41 ForceGenericVersion("amdgpu-force-generic-version",
42 cl::desc("Force a specific generic_v<N> flag to be "
43 "added. For testing purposes only."),
44 cl::ReallyHidden, cl::init(Val: 0));
45
46bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
47 msgpack::Document HSAMetadataDoc;
48 if (!HSAMetadataDoc.fromYAML(S: HSAMetadataString))
49 return false;
50 return EmitHSAMetadata(HSAMetadata&: HSAMetadataDoc, Strict: false);
51}
52
53StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
54 AMDGPU::GPUKind AK;
55
56 // clang-format off
57 switch (ElfMach) {
58 case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
59 case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
60 case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
61 case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
62 case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
63 case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
64 case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
65 case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
66 case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
67 case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
68 case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
69 case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
70 case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
71 case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
72 case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
73 case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
74 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
75 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
76 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX602: AK = GK_GFX602; break;
77 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
78 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
79 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
80 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
81 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
82 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX705: AK = GK_GFX705; break;
83 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
84 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
85 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
86 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX805: AK = GK_GFX805; break;
87 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
88 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
89 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
90 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
91 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
92 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX908: AK = GK_GFX908; break;
93 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
94 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
95 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
96 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
97 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
98 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
99 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
100 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
101 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013: AK = GK_GFX1013; break;
102 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030: AK = GK_GFX1030; break;
103 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031: AK = GK_GFX1031; break;
104 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032: AK = GK_GFX1032; break;
105 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033: AK = GK_GFX1033; break;
106 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034: AK = GK_GFX1034; break;
107 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035: AK = GK_GFX1035; break;
108 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036: AK = GK_GFX1036; break;
109 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100: AK = GK_GFX1100; break;
110 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
111 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
112 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
113 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
114 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
115 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break;
116 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break;
117 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
118 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
119 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250: AK = GK_GFX1250; break;
120 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251: AK = GK_GFX1251; break;
121 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310: AK = GK_GFX1310; break;
122 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
123 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC: AK = GK_GFX9_4_GENERIC; break;
124 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
125 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
126 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
127 case ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC: AK = GK_GFX12_GENERIC; break;
128 case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
129 default: AK = GK_NONE; break;
130 }
131 // clang-format on
132
133 StringRef GPUName = getArchNameAMDGCN(AK);
134 if (GPUName != "")
135 return GPUName;
136 return getArchNameR600(AK);
137}
138
139unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
140 AMDGPU::GPUKind AK = parseArchAMDGCN(CPU: GPU);
141 if (AK == AMDGPU::GPUKind::GK_NONE)
142 AK = parseArchR600(CPU: GPU);
143
144 // clang-format off
145 switch (AK) {
146 case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
147 case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
148 case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
149 case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
150 case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
151 case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
152 case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
153 case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
154 case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
155 case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
156 case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
157 case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
158 case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
159 case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
160 case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
161 case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
162 case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
163 case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
164 case GK_GFX602: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX602;
165 case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
166 case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
167 case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
168 case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
169 case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
170 case GK_GFX705: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX705;
171 case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
172 case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
173 case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
174 case GK_GFX805: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX805;
175 case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
176 case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
177 case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
178 case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
179 case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
180 case GK_GFX908: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX908;
181 case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
182 case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
183 case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
184 case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
185 case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
186 case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
187 case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
188 case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
189 case GK_GFX1013: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1013;
190 case GK_GFX1030: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1030;
191 case GK_GFX1031: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1031;
192 case GK_GFX1032: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1032;
193 case GK_GFX1033: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1033;
194 case GK_GFX1034: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1034;
195 case GK_GFX1035: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1035;
196 case GK_GFX1036: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1036;
197 case GK_GFX1100: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1100;
198 case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
199 case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
200 case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
201 case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
202 case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
203 case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152;
204 case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153;
205 case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
206 case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
207 case GK_GFX1250: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1250;
208 case GK_GFX1251: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1251;
209 case GK_GFX1310: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1310;
210 case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
211 case GK_GFX9_4_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC;
212 case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
213 case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
214 case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
215 case GK_GFX12_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC;
216 case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
217 }
218 // clang-format on
219
220 llvm_unreachable("unknown GPU");
221}
222
223//===----------------------------------------------------------------------===//
224// AMDGPUTargetAsmStreamer
225//===----------------------------------------------------------------------===//
226
227AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
228 formatted_raw_ostream &OS)
229 : AMDGPUTargetStreamer(S), OS(OS) { }
230
231// A hook for emitting stuff at the end.
232// We use it for emitting the accumulated PAL metadata as directives.
233// The PAL metadata is reset after it is emitted.
234void AMDGPUTargetAsmStreamer::finish() {
235 std::string S;
236 getPALMetadata()->toString(S);
237 OS << S;
238
239 // Reset the pal metadata so its data will not affect a compilation that
240 // reuses this object.
241 getPALMetadata()->reset();
242}
243
244void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget() {
245 OS << "\t.amdgcn_target \"" << getTargetID()->toString() << "\"\n";
246}
247
248void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
249 unsigned COV) {
250 AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion(COV);
251 OS << "\t.amdhsa_code_object_version " << COV << '\n';
252}
253
254void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
255 auto FoldAndPrint = [&](const MCExpr *Expr, raw_ostream &OS,
256 const MCAsmInfo *MAI) {
257 printAMDGPUMCExpr(Expr: foldAMDGPUMCExpr(Expr, Ctx&: getContext()), OS, MAI);
258 };
259
260 OS << "\t.amd_kernel_code_t\n";
261 Header.EmitKernelCodeT(OS, Ctx&: getContext(), Helper: FoldAndPrint);
262 OS << "\t.end_amd_kernel_code_t\n";
263}
264
265void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
266 unsigned Type) {
267 switch (Type) {
268 default: llvm_unreachable("Invalid AMDGPU symbol type");
269 case ELF::STT_AMDGPU_HSA_KERNEL:
270 OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
271 break;
272 }
273}
274
275void AMDGPUTargetAsmStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
276 Align Alignment) {
277 OS << "\t.amdgpu_lds " << Symbol->getName() << ", " << Size << ", "
278 << Alignment.value() << '\n';
279}
280
281void AMDGPUTargetAsmStreamer::EmitMCResourceInfo(
282 const MCSymbol *NumVGPR, const MCSymbol *NumAGPR,
283 const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier,
284 const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC,
285 const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack,
286 const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall) {
287#define PRINT_RES_INFO(ARG) \
288 OS << "\t.set "; \
289 ARG->print(OS, getContext().getAsmInfo()); \
290 OS << ", "; \
291 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
292 Streamer.addBlankLine();
293
294 PRINT_RES_INFO(NumVGPR);
295 PRINT_RES_INFO(NumAGPR);
296 PRINT_RES_INFO(NumExplicitSGPR);
297 PRINT_RES_INFO(NumNamedBarrier);
298 PRINT_RES_INFO(PrivateSegmentSize);
299 PRINT_RES_INFO(UsesVCC);
300 PRINT_RES_INFO(UsesFlatScratch);
301 PRINT_RES_INFO(HasDynamicallySizedStack);
302 PRINT_RES_INFO(HasRecursion);
303 PRINT_RES_INFO(HasIndirectCall);
304#undef PRINT_RES_INFO
305}
306
307void AMDGPUTargetAsmStreamer::EmitMCResourceMaximums(
308 const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR,
309 const MCSymbol *MaxNamedBarrier) {
310#define PRINT_RES_INFO(ARG) \
311 OS << "\t.set "; \
312 ARG->print(OS, getContext().getAsmInfo()); \
313 OS << ", "; \
314 getContext().getAsmInfo()->printExpr(OS, *ARG->getVariableValue()); \
315 Streamer.addBlankLine();
316
317 PRINT_RES_INFO(MaxVGPR);
318 PRINT_RES_INFO(MaxAGPR);
319 PRINT_RES_INFO(MaxSGPR);
320 PRINT_RES_INFO(MaxNamedBarrier);
321#undef PRINT_RES_INFO
322}
323
324bool AMDGPUTargetAsmStreamer::EmitISAVersion() {
325 OS << "\t.amd_amdgpu_isa \"" << getTargetID()->toString() << "\"\n";
326 return true;
327}
328
329bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
330 msgpack::Document &HSAMetadataDoc, bool Strict) {
331 HSAMD::V3::MetadataVerifier Verifier(Strict);
332 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
333 return false;
334
335 std::string HSAMetadataString;
336 raw_string_ostream StrOS(HSAMetadataString);
337 HSAMetadataDoc.toYAML(OS&: StrOS);
338
339 OS << '\t' << HSAMD::V3::AssemblerDirectiveBegin << '\n';
340 OS << StrOS.str() << '\n';
341 OS << '\t' << HSAMD::V3::AssemblerDirectiveEnd << '\n';
342 return true;
343}
344
345bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
346 const uint32_t Encoded_s_code_end = 0xbf9f0000;
347 const uint32_t Encoded_s_nop = 0xbf800000;
348 uint32_t Encoded_pad = Encoded_s_code_end;
349
350 // Instruction cache line size in bytes.
351 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
352 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
353
354 // Extra padding amount in bytes to support prefetch mode 3.
355 unsigned FillSize = 3 * CacheLineSize;
356
357 if (AMDGPU::isGFX90A(STI)) {
358 Encoded_pad = Encoded_s_nop;
359 FillSize = 16 * CacheLineSize;
360 }
361
362 OS << "\t.p2alignl " << Log2CacheLineSize << ", " << Encoded_pad << '\n';
363 OS << "\t.fill " << (FillSize / 4) << ", 4, " << Encoded_pad << '\n';
364 return true;
365}
366
367void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
368 const MCSubtargetInfo &STI, StringRef KernelName,
369 const MCKernelDescriptor &KD, const MCExpr *NextVGPR,
370 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
371 const MCExpr *ReserveFlatScr) {
372 IsaVersion IVersion = getIsaVersion(GPU: STI.getCPU());
373 const MCAsmInfo *MAI = getContext().getAsmInfo();
374
375 OS << "\t.amdhsa_kernel " << KernelName << '\n';
376
377 auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
378 StringRef Directive) {
379 OS << "\t\t" << Directive << ' ';
380 const MCExpr *ShiftedAndMaskedExpr =
381 MCKernelDescriptor::bits_get(Src: Expr, Shift, Mask, Ctx&: getContext());
382 const MCExpr *New = foldAMDGPUMCExpr(Expr: ShiftedAndMaskedExpr, Ctx&: getContext());
383 printAMDGPUMCExpr(Expr: New, OS, MAI);
384 OS << '\n';
385 };
386
387 auto EmitMCExpr = [&](const MCExpr *Value) {
388 const MCExpr *NewExpr = foldAMDGPUMCExpr(Expr: Value, Ctx&: getContext());
389 printAMDGPUMCExpr(Expr: NewExpr, OS, MAI);
390 };
391
392 OS << "\t\t.amdhsa_group_segment_fixed_size ";
393 EmitMCExpr(KD.group_segment_fixed_size);
394 OS << '\n';
395
396 OS << "\t\t.amdhsa_private_segment_fixed_size ";
397 EmitMCExpr(KD.private_segment_fixed_size);
398 OS << '\n';
399
400 OS << "\t\t.amdhsa_kernarg_size ";
401 EmitMCExpr(KD.kernarg_size);
402 OS << '\n';
403
404 if (isGFX1250Plus(STI)) {
405 PrintField(KD.compute_pgm_rsrc2,
406 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
407 amdhsa::COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT,
408 ".amdhsa_user_sgpr_count");
409 } else {
410 PrintField(KD.compute_pgm_rsrc2,
411 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
412 amdhsa::COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT,
413 ".amdhsa_user_sgpr_count");
414 }
415
416 if (!hasArchitectedFlatScratch(STI))
417 PrintField(
418 KD.kernel_code_properties,
419 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
420 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
421 ".amdhsa_user_sgpr_private_segment_buffer");
422 PrintField(KD.kernel_code_properties,
423 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
424 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR,
425 ".amdhsa_user_sgpr_dispatch_ptr");
426 PrintField(KD.kernel_code_properties,
427 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
428 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR,
429 ".amdhsa_user_sgpr_queue_ptr");
430 PrintField(KD.kernel_code_properties,
431 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
432 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
433 ".amdhsa_user_sgpr_kernarg_segment_ptr");
434 PrintField(KD.kernel_code_properties,
435 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
436 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID,
437 ".amdhsa_user_sgpr_dispatch_id");
438 if (!hasArchitectedFlatScratch(STI))
439 PrintField(KD.kernel_code_properties,
440 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
441 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
442 ".amdhsa_user_sgpr_flat_scratch_init");
443 if (hasKernargPreload(STI)) {
444 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_LENGTH_SHIFT,
445 amdhsa::KERNARG_PRELOAD_SPEC_LENGTH,
446 ".amdhsa_user_sgpr_kernarg_preload_length");
447 PrintField(KD.kernarg_preload, amdhsa::KERNARG_PRELOAD_SPEC_OFFSET_SHIFT,
448 amdhsa::KERNARG_PRELOAD_SPEC_OFFSET,
449 ".amdhsa_user_sgpr_kernarg_preload_offset");
450 }
451 PrintField(
452 KD.kernel_code_properties,
453 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
454 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
455 ".amdhsa_user_sgpr_private_segment_size");
456 if (IVersion.Major >= 10)
457 PrintField(KD.kernel_code_properties,
458 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
459 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
460 ".amdhsa_wavefront_size32");
461 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
462 PrintField(KD.kernel_code_properties,
463 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT,
464 amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK,
465 ".amdhsa_uses_dynamic_stack");
466 PrintField(KD.compute_pgm_rsrc2,
467 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT_SHIFT,
468 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT,
469 (hasArchitectedFlatScratch(STI)
470 ? ".amdhsa_enable_private_segment"
471 : ".amdhsa_system_sgpr_private_segment_wavefront_offset"));
472 PrintField(KD.compute_pgm_rsrc2,
473 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT,
474 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X,
475 ".amdhsa_system_sgpr_workgroup_id_x");
476 PrintField(KD.compute_pgm_rsrc2,
477 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y_SHIFT,
478 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y,
479 ".amdhsa_system_sgpr_workgroup_id_y");
480 PrintField(KD.compute_pgm_rsrc2,
481 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z_SHIFT,
482 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z,
483 ".amdhsa_system_sgpr_workgroup_id_z");
484 PrintField(KD.compute_pgm_rsrc2,
485 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO_SHIFT,
486 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO,
487 ".amdhsa_system_sgpr_workgroup_info");
488 PrintField(KD.compute_pgm_rsrc2,
489 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID_SHIFT,
490 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID,
491 ".amdhsa_system_vgpr_workitem_id");
492
493 // These directives are required.
494 OS << "\t\t.amdhsa_next_free_vgpr ";
495 EmitMCExpr(NextVGPR);
496 OS << '\n';
497
498 OS << "\t\t.amdhsa_next_free_sgpr ";
499 EmitMCExpr(NextSGPR);
500 OS << '\n';
501
502 if (AMDGPU::isGFX90A(STI)) {
503 // MCExpr equivalent of taking the (accum_offset + 1) * 4.
504 const MCExpr *accum_bits = MCKernelDescriptor::bits_get(
505 Src: KD.compute_pgm_rsrc3,
506 Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
507 Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx&: getContext());
508 accum_bits = MCBinaryExpr::createAdd(
509 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 1, Ctx&: getContext()), Ctx&: getContext());
510 accum_bits = MCBinaryExpr::createMul(
511 LHS: accum_bits, RHS: MCConstantExpr::create(Value: 4, Ctx&: getContext()), Ctx&: getContext());
512 OS << "\t\t.amdhsa_accum_offset ";
513 const MCExpr *New = foldAMDGPUMCExpr(Expr: accum_bits, Ctx&: getContext());
514 printAMDGPUMCExpr(Expr: New, OS, MAI);
515 OS << '\n';
516 }
517
518 if (isGFX1250Plus(STI))
519 PrintField(KD.compute_pgm_rsrc3,
520 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
521 amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
522 ".amdhsa_named_barrier_count");
523
524 OS << "\t\t.amdhsa_reserve_vcc ";
525 EmitMCExpr(ReserveVCC);
526 OS << '\n';
527
528 if (IVersion.Major >= 7 && !hasArchitectedFlatScratch(STI)) {
529 OS << "\t\t.amdhsa_reserve_flat_scratch ";
530 EmitMCExpr(ReserveFlatScr);
531 OS << '\n';
532 }
533
534 switch (CodeObjectVersion) {
535 default:
536 break;
537 case AMDGPU::AMDHSA_COV4:
538 case AMDGPU::AMDHSA_COV5:
539 if (getTargetID()->isXnackSupported())
540 OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
541 break;
542 }
543
544 PrintField(KD.compute_pgm_rsrc1,
545 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32_SHIFT,
546 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32,
547 ".amdhsa_float_round_mode_32");
548 PrintField(KD.compute_pgm_rsrc1,
549 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64_SHIFT,
550 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64,
551 ".amdhsa_float_round_mode_16_64");
552 PrintField(KD.compute_pgm_rsrc1,
553 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32_SHIFT,
554 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32,
555 ".amdhsa_float_denorm_mode_32");
556 PrintField(KD.compute_pgm_rsrc1,
557 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT,
558 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
559 ".amdhsa_float_denorm_mode_16_64");
560 if (IVersion.Major < 12) {
561 PrintField(KD.compute_pgm_rsrc1,
562 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT,
563 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP,
564 ".amdhsa_dx10_clamp");
565 PrintField(KD.compute_pgm_rsrc1,
566 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT,
567 amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE,
568 ".amdhsa_ieee_mode");
569 }
570 if (IVersion.Major >= 9) {
571 PrintField(KD.compute_pgm_rsrc1,
572 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL_SHIFT,
573 amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL,
574 ".amdhsa_fp16_overflow");
575 }
576 if (AMDGPU::isGFX90A(STI))
577 PrintField(KD.compute_pgm_rsrc3,
578 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
579 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, ".amdhsa_tg_split");
580 if (AMDGPU::supportsWGP(STI))
581 PrintField(KD.compute_pgm_rsrc1,
582 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT,
583 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
584 ".amdhsa_workgroup_processor_mode");
585 if (IVersion.Major >= 10) {
586 PrintField(KD.compute_pgm_rsrc1,
587 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT,
588 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED,
589 ".amdhsa_memory_ordered");
590 PrintField(KD.compute_pgm_rsrc1,
591 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT,
592 amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS,
593 ".amdhsa_forward_progress");
594 }
595 if (IVersion.Major >= 10 && IVersion.Major < 12) {
596 PrintField(KD.compute_pgm_rsrc3,
597 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT_SHIFT,
598 amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
599 ".amdhsa_shared_vgpr_count");
600 }
601 if (IVersion.Major == 11) {
602 PrintField(KD.compute_pgm_rsrc3,
603 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
604 amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
605 ".amdhsa_inst_pref_size");
606 }
607 if (IVersion.Major >= 12) {
608 PrintField(KD.compute_pgm_rsrc3,
609 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
610 amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
611 ".amdhsa_inst_pref_size");
612 PrintField(KD.compute_pgm_rsrc1,
613 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
614 amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
615 ".amdhsa_round_robin_scheduling");
616 }
617 PrintField(
618 KD.compute_pgm_rsrc2,
619 amdhsa::
620 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION_SHIFT,
621 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
622 ".amdhsa_exception_fp_ieee_invalid_op");
623 PrintField(
624 KD.compute_pgm_rsrc2,
625 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE_SHIFT,
626 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
627 ".amdhsa_exception_fp_denorm_src");
628 PrintField(
629 KD.compute_pgm_rsrc2,
630 amdhsa::
631 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO_SHIFT,
632 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
633 ".amdhsa_exception_fp_ieee_div_zero");
634 PrintField(
635 KD.compute_pgm_rsrc2,
636 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW_SHIFT,
637 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
638 ".amdhsa_exception_fp_ieee_overflow");
639 PrintField(
640 KD.compute_pgm_rsrc2,
641 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW_SHIFT,
642 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
643 ".amdhsa_exception_fp_ieee_underflow");
644 PrintField(
645 KD.compute_pgm_rsrc2,
646 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT_SHIFT,
647 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
648 ".amdhsa_exception_fp_ieee_inexact");
649 PrintField(
650 KD.compute_pgm_rsrc2,
651 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO_SHIFT,
652 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
653 ".amdhsa_exception_int_div_zero");
654
655 OS << "\t.end_amdhsa_kernel\n";
656}
657
658//===----------------------------------------------------------------------===//
659// AMDGPUTargetELFStreamer
660//===----------------------------------------------------------------------===//
661
662AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S,
663 const MCSubtargetInfo &STI)
664 : AMDGPUTargetStreamer(S), STI(STI), Streamer(S) {}
665
666MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
667 return static_cast<MCELFStreamer &>(Streamer);
668}
669
670// A hook for emitting stuff at the end.
671// We use it for emitting the accumulated PAL metadata as a .note record.
672// The PAL metadata is reset after it is emitted.
673void AMDGPUTargetELFStreamer::finish() {
674 ELFObjectWriter &W = getStreamer().getWriter();
675 W.setELFHeaderEFlags(getEFlags());
676 W.setOverrideABIVersion(
677 getELFABIVersion(OS: STI.getTargetTriple(), CodeObjectVersion));
678
679 std::string Blob;
680 const char *Vendor = getPALMetadata()->getVendor();
681 unsigned Type = getPALMetadata()->getType();
682 getPALMetadata()->toBlob(Type, S&: Blob);
683 if (Blob.empty())
684 return;
685 EmitNote(Name: Vendor, DescSize: MCConstantExpr::create(Value: Blob.size(), Ctx&: getContext()), NoteType: Type,
686 EmitDesc: [&](MCELFStreamer &OS) { OS.emitBytes(Data: Blob); });
687
688 // Reset the pal metadata so its data will not affect a compilation that
689 // reuses this object.
690 getPALMetadata()->reset();
691}
692
693void AMDGPUTargetELFStreamer::EmitNote(
694 StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
695 function_ref<void(MCELFStreamer &)> EmitDesc) {
696 auto &S = getStreamer();
697 auto &Context = S.getContext();
698
699 auto NameSZ = Name.size() + 1;
700
701 unsigned NoteFlags = 0;
702 // TODO Apparently, this is currently needed for OpenCL as mentioned in
703 // https://reviews.llvm.org/D74995
704 if (isHsaAbi(STI))
705 NoteFlags = ELF::SHF_ALLOC;
706
707 S.pushSection();
708 S.switchSection(
709 Section: Context.getELFSection(Section: ElfNote::SectionName, Type: ELF::SHT_NOTE, Flags: NoteFlags));
710 S.emitInt32(Value: NameSZ); // namesz
711 S.emitValue(Value: DescSZ, Size: 4); // descz
712 S.emitInt32(Value: NoteType); // type
713 S.emitBytes(Data: Name); // name
714 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
715 EmitDesc(S); // desc
716 S.emitValueToAlignment(Alignment: Align(4), Fill: 0, FillLen: 1, MaxBytesToEmit: 0); // padding 0
717 S.popSection();
718}
719
720unsigned AMDGPUTargetELFStreamer::getEFlags() {
721 switch (STI.getTargetTriple().getArch()) {
722 default:
723 llvm_unreachable("Unsupported Arch");
724 case Triple::r600:
725 return getEFlagsR600();
726 case Triple::amdgcn:
727 return getEFlagsAMDGCN();
728 }
729}
730
731unsigned AMDGPUTargetELFStreamer::getEFlagsR600() {
732 assert(STI.getTargetTriple().getArch() == Triple::r600);
733
734 return getElfMach(GPU: STI.getCPU());
735}
736
737unsigned AMDGPUTargetELFStreamer::getEFlagsAMDGCN() {
738 assert(STI.getTargetTriple().isAMDGCN());
739
740 switch (STI.getTargetTriple().getOS()) {
741 default:
742 // TODO: Why are some tests have "mingw" listed as OS?
743 // llvm_unreachable("Unsupported OS");
744 case Triple::UnknownOS:
745 return getEFlagsUnknownOS();
746 case Triple::AMDHSA:
747 return getEFlagsAMDHSA();
748 case Triple::AMDPAL:
749 return getEFlagsAMDPAL();
750 case Triple::Mesa3D:
751 return getEFlagsMesa3D();
752 }
753}
754
755unsigned AMDGPUTargetELFStreamer::getEFlagsUnknownOS() {
756 // TODO: Why are some tests have "mingw" listed as OS?
757 // assert(STI.getTargetTriple().getOS() == Triple::UnknownOS);
758
759 return getEFlagsV3();
760}
761
762unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
763 assert(isHsaAbi(STI));
764
765 if (CodeObjectVersion >= 6)
766 return getEFlagsV6();
767 return getEFlagsV4();
768}
769
770unsigned AMDGPUTargetELFStreamer::getEFlagsAMDPAL() {
771 assert(STI.getTargetTriple().getOS() == Triple::AMDPAL);
772
773 return getEFlagsV3();
774}
775
776unsigned AMDGPUTargetELFStreamer::getEFlagsMesa3D() {
777 assert(STI.getTargetTriple().getOS() == Triple::Mesa3D);
778
779 return getEFlagsV3();
780}
781
782unsigned AMDGPUTargetELFStreamer::getEFlagsV3() {
783 unsigned EFlagsV3 = 0;
784
785 // mach.
786 EFlagsV3 |= getElfMach(GPU: STI.getCPU());
787
788 // xnack.
789 if (getTargetID()->isXnackOnOrAny())
790 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_XNACK_V3;
791 // sramecc.
792 if (getTargetID()->isSramEccOnOrAny())
793 EFlagsV3 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_V3;
794
795 return EFlagsV3;
796}
797
798unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
799 unsigned EFlagsV4 = 0;
800
801 // mach.
802 EFlagsV4 |= getElfMach(GPU: STI.getCPU());
803
804 // xnack.
805 switch (getTargetID()->getXnackSetting()) {
806 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
807 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4;
808 break;
809 case AMDGPU::IsaInfo::TargetIDSetting::Any:
810 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4;
811 break;
812 case AMDGPU::IsaInfo::TargetIDSetting::Off:
813 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4;
814 break;
815 case AMDGPU::IsaInfo::TargetIDSetting::On:
816 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4;
817 break;
818 }
819 // sramecc.
820 switch (getTargetID()->getSramEccSetting()) {
821 case AMDGPU::IsaInfo::TargetIDSetting::Unsupported:
822 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
823 break;
824 case AMDGPU::IsaInfo::TargetIDSetting::Any:
825 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4;
826 break;
827 case AMDGPU::IsaInfo::TargetIDSetting::Off:
828 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4;
829 break;
830 case AMDGPU::IsaInfo::TargetIDSetting::On:
831 EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4;
832 break;
833 }
834
835 return EFlagsV4;
836}
837
838unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
839 unsigned Flags = getEFlagsV4();
840
841 unsigned Version = ForceGenericVersion;
842 if (!Version) {
843 switch (parseArchAMDGCN(CPU: STI.getCPU())) {
844 case AMDGPU::GK_GFX9_GENERIC:
845 Version = GenericVersion::GFX9;
846 break;
847 case AMDGPU::GK_GFX9_4_GENERIC:
848 Version = GenericVersion::GFX9_4;
849 break;
850 case AMDGPU::GK_GFX10_1_GENERIC:
851 Version = GenericVersion::GFX10_1;
852 break;
853 case AMDGPU::GK_GFX10_3_GENERIC:
854 Version = GenericVersion::GFX10_3;
855 break;
856 case AMDGPU::GK_GFX11_GENERIC:
857 Version = GenericVersion::GFX11;
858 break;
859 case AMDGPU::GK_GFX12_GENERIC:
860 Version = GenericVersion::GFX12;
861 break;
862 default:
863 break;
864 }
865 }
866
867 // Versions start at 1.
868 if (Version) {
869 if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
870 report_fatal_error(reason: "Cannot encode generic code object version " +
871 Twine(Version) +
872 " - no ELF flag can represent this version!");
873 Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
874 }
875
876 return Flags;
877}
878
879void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
880
881void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
882 MCStreamer &OS = getStreamer();
883 OS.pushSection();
884 Header.EmitKernelCodeT(OS, Ctx&: getContext());
885 OS.popSection();
886}
887
888void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
889 unsigned Type) {
890 auto *Symbol = static_cast<MCSymbolELF *>(
891 getStreamer().getContext().getOrCreateSymbol(Name: SymbolName));
892 Symbol->setType(Type);
893}
894
895void AMDGPUTargetELFStreamer::emitAMDGPULDS(MCSymbol *Symbol, unsigned Size,
896 Align Alignment) {
897 auto *SymbolELF = static_cast<MCSymbolELF *>(Symbol);
898 SymbolELF->setType(ELF::STT_OBJECT);
899
900 if (!SymbolELF->isBindingSet())
901 SymbolELF->setBinding(ELF::STB_GLOBAL);
902
903 if (SymbolELF->declareCommon(Size, Alignment)) {
904 report_fatal_error(reason: "Symbol: " + Symbol->getName() +
905 " redeclared as different type");
906 }
907
908 SymbolELF->setIndex(ELF::SHN_AMDGPU_LDS);
909 SymbolELF->setSize(MCConstantExpr::create(Value: Size, Ctx&: getContext()));
910}
911
912bool AMDGPUTargetELFStreamer::EmitISAVersion() {
913 // Create two labels to mark the beginning and end of the desc field
914 // and a MCExpr to calculate the size of the desc field.
915 auto &Context = getContext();
916 auto *DescBegin = Context.createTempSymbol();
917 auto *DescEnd = Context.createTempSymbol();
918 auto *DescSZ = MCBinaryExpr::createSub(
919 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
920 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
921
922 EmitNote(Name: ElfNote::NoteNameV2, DescSZ, NoteType: ELF::NT_AMD_HSA_ISA_NAME,
923 EmitDesc: [&](MCELFStreamer &OS) {
924 OS.emitLabel(Symbol: DescBegin);
925 OS.emitBytes(Data: getTargetID()->toString());
926 OS.emitLabel(Symbol: DescEnd);
927 });
928 return true;
929}
930
931bool AMDGPUTargetELFStreamer::EmitHSAMetadata(msgpack::Document &HSAMetadataDoc,
932 bool Strict) {
933 HSAMD::V3::MetadataVerifier Verifier(Strict);
934 if (!Verifier.verify(HSAMetadataRoot&: HSAMetadataDoc.getRoot()))
935 return false;
936
937 std::string HSAMetadataString;
938 HSAMetadataDoc.writeToBlob(Blob&: HSAMetadataString);
939
940 // Create two labels to mark the beginning and end of the desc field
941 // and a MCExpr to calculate the size of the desc field.
942 auto &Context = getContext();
943 auto *DescBegin = Context.createTempSymbol();
944 auto *DescEnd = Context.createTempSymbol();
945 auto *DescSZ = MCBinaryExpr::createSub(
946 LHS: MCSymbolRefExpr::create(Symbol: DescEnd, Ctx&: Context),
947 RHS: MCSymbolRefExpr::create(Symbol: DescBegin, Ctx&: Context), Ctx&: Context);
948
949 EmitNote(Name: ElfNote::NoteNameV3, DescSZ, NoteType: ELF::NT_AMDGPU_METADATA,
950 EmitDesc: [&](MCELFStreamer &OS) {
951 OS.emitLabel(Symbol: DescBegin);
952 OS.emitBytes(Data: HSAMetadataString);
953 OS.emitLabel(Symbol: DescEnd);
954 });
955 return true;
956}
957
958bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
959 const uint32_t Encoded_s_code_end = 0xbf9f0000;
960 const uint32_t Encoded_s_nop = 0xbf800000;
961 uint32_t Encoded_pad = Encoded_s_code_end;
962
963 // Instruction cache line size in bytes.
964 const unsigned Log2CacheLineSize = AMDGPU::isGFX11Plus(STI) ? 7 : 6;
965 const unsigned CacheLineSize = 1u << Log2CacheLineSize;
966
967 // Extra padding amount in bytes to support prefetch mode 3.
968 unsigned FillSize = 3 * CacheLineSize;
969
970 if (AMDGPU::isGFX90A(STI)) {
971 Encoded_pad = Encoded_s_nop;
972 FillSize = 16 * CacheLineSize;
973 }
974
975 MCStreamer &OS = getStreamer();
976 OS.pushSection();
977 OS.emitValueToAlignment(Alignment: Align(CacheLineSize), Fill: Encoded_pad, FillLen: 4);
978 for (unsigned I = 0; I < FillSize; I += 4)
979 OS.emitInt32(Value: Encoded_pad);
980 OS.popSection();
981 return true;
982}
983
984void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
985 const MCSubtargetInfo &STI, StringRef KernelName,
986 const MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR,
987 const MCExpr *NextSGPR, const MCExpr *ReserveVCC,
988 const MCExpr *ReserveFlatScr) {
989 auto &Streamer = getStreamer();
990 auto &Context = Streamer.getContext();
991
992 auto *KernelCodeSymbol =
993 static_cast<MCSymbolELF *>(Context.getOrCreateSymbol(Name: Twine(KernelName)));
994 auto *KernelDescriptorSymbol = static_cast<MCSymbolELF *>(
995 Context.getOrCreateSymbol(Name: Twine(KernelName) + Twine(".kd")));
996
997 // Copy kernel descriptor symbol's binding, other and visibility from the
998 // kernel code symbol.
999 KernelDescriptorSymbol->setBinding(KernelCodeSymbol->getBinding());
1000 KernelDescriptorSymbol->setOther(KernelCodeSymbol->getOther());
1001 KernelDescriptorSymbol->setVisibility(KernelCodeSymbol->getVisibility());
1002 // Kernel descriptor symbol's type and size are fixed.
1003 KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
1004 KernelDescriptorSymbol->setSize(
1005 MCConstantExpr::create(Value: sizeof(amdhsa::kernel_descriptor_t), Ctx&: Context));
1006
1007 // The visibility of the kernel code symbol must be protected or less to allow
1008 // static relocations from the kernel descriptor to be used.
1009 if (KernelCodeSymbol->getVisibility() == ELF::STV_DEFAULT)
1010 KernelCodeSymbol->setVisibility(ELF::STV_PROTECTED);
1011
1012 Streamer.emitLabel(Symbol: KernelDescriptorSymbol);
1013 Streamer.emitValue(
1014 Value: KernelDescriptor.group_segment_fixed_size,
1015 Size: sizeof(amdhsa::kernel_descriptor_t::group_segment_fixed_size));
1016 Streamer.emitValue(
1017 Value: KernelDescriptor.private_segment_fixed_size,
1018 Size: sizeof(amdhsa::kernel_descriptor_t::private_segment_fixed_size));
1019 Streamer.emitValue(Value: KernelDescriptor.kernarg_size,
1020 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_size));
1021
1022 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved0); ++i)
1023 Streamer.emitInt8(Value: 0u);
1024
1025 // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
1026 // expression being created is:
1027 // (start of kernel code) - (start of kernel descriptor)
1028 // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
1029 Streamer.emitValue(
1030 Value: MCBinaryExpr::createSub(
1031 LHS: MCSymbolRefExpr::create(Symbol: KernelCodeSymbol, specifier: AMDGPUMCExpr::S_REL64,
1032 Ctx&: Context),
1033 RHS: MCSymbolRefExpr::create(Symbol: KernelDescriptorSymbol, Ctx&: Context), Ctx&: Context),
1034 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset));
1035 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i)
1036 Streamer.emitInt8(Value: 0u);
1037 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc3,
1038 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc3));
1039 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc1,
1040 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc1));
1041 Streamer.emitValue(Value: KernelDescriptor.compute_pgm_rsrc2,
1042 Size: sizeof(amdhsa::kernel_descriptor_t::compute_pgm_rsrc2));
1043 Streamer.emitValue(
1044 Value: KernelDescriptor.kernel_code_properties,
1045 Size: sizeof(amdhsa::kernel_descriptor_t::kernel_code_properties));
1046 Streamer.emitValue(Value: KernelDescriptor.kernarg_preload,
1047 Size: sizeof(amdhsa::kernel_descriptor_t::kernarg_preload));
1048 for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved3); ++i)
1049 Streamer.emitInt8(Value: 0u);
1050}
1051