1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUHSAMetadataStreamer.h"
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
24using namespace llvm;
25
26static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
27 const DataLayout &DL) {
28 Type *Ty = Arg.getType();
29 MaybeAlign ArgAlign;
30 if (Arg.hasByRefAttr()) {
31 Ty = Arg.getParamByRefType();
32 ArgAlign = Arg.getParamAlign();
33 }
34
35 if (!ArgAlign)
36 ArgAlign = DL.getABITypeAlign(Ty);
37
38 return std::pair(Ty, *ArgAlign);
39}
40
41namespace llvm {
42
43static cl::opt<bool> DumpHSAMetadata(
44 "amdgpu-dump-hsa-metadata",
45 cl::desc("Dump AMDGPU HSA Metadata"));
46static cl::opt<bool> VerifyHSAMetadata(
47 "amdgpu-verify-hsa-metadata",
48 cl::desc("Verify AMDGPU HSA Metadata"));
49
50namespace AMDGPU::HSAMD {
51
52//===----------------------------------------------------------------------===//
53// HSAMetadataStreamerV4
54//===----------------------------------------------------------------------===//
55
56void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
57 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
58}
59
60void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
61 errs() << "AMDGPU HSA Metadata Parser Test: ";
62
63 msgpack::Document FromHSAMetadataString;
64
65 if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) {
66 errs() << "FAIL\n";
67 return;
68 }
69
70 std::string ToHSAMetadataString;
71 raw_string_ostream StrOS(ToHSAMetadataString);
72 FromHSAMetadataString.toYAML(OS&: StrOS);
73
74 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << StrOS.str() << '\n';
78 }
79}
80
81std::optional<StringRef>
82MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
83 return StringSwitch<std::optional<StringRef>>(AccQual)
84 .Case(S: "read_only", Value: StringRef("read_only"))
85 .Case(S: "write_only", Value: StringRef("write_only"))
86 .Case(S: "read_write", Value: StringRef("read_write"))
87 .Default(Value: std::nullopt);
88}
89
90std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
91 unsigned AddressSpace) const {
92 switch (AddressSpace) {
93 case AMDGPUAS::PRIVATE_ADDRESS:
94 return StringRef("private");
95 case AMDGPUAS::GLOBAL_ADDRESS:
96 return StringRef("global");
97 case AMDGPUAS::CONSTANT_ADDRESS:
98 return StringRef("constant");
99 case AMDGPUAS::LOCAL_ADDRESS:
100 return StringRef("local");
101 case AMDGPUAS::FLAT_ADDRESS:
102 return StringRef("generic");
103 case AMDGPUAS::REGION_ADDRESS:
104 return StringRef("region");
105 default:
106 return std::nullopt;
107 }
108}
109
110StringRef
111MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
112 StringRef BaseTypeName) const {
113 if (TypeQual.contains(Other: "pipe"))
114 return "pipe";
115
116 return StringSwitch<StringRef>(BaseTypeName)
117 .Case(S: "image1d_t", Value: "image")
118 .Case(S: "image1d_array_t", Value: "image")
119 .Case(S: "image1d_buffer_t", Value: "image")
120 .Case(S: "image2d_t", Value: "image")
121 .Case(S: "image2d_array_t", Value: "image")
122 .Case(S: "image2d_array_depth_t", Value: "image")
123 .Case(S: "image2d_array_msaa_t", Value: "image")
124 .Case(S: "image2d_array_msaa_depth_t", Value: "image")
125 .Case(S: "image2d_depth_t", Value: "image")
126 .Case(S: "image2d_msaa_t", Value: "image")
127 .Case(S: "image2d_msaa_depth_t", Value: "image")
128 .Case(S: "image3d_t", Value: "image")
129 .Case(S: "sampler_t", Value: "sampler")
130 .Case(S: "queue_t", Value: "queue")
131 .Default(Value: isa<PointerType>(Val: Ty)
132 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
133 ? "dynamic_shared_pointer"
134 : "global_buffer")
135 : "by_value");
136}
137
138std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
139 bool Signed) const {
140 switch (Ty->getTypeID()) {
141 case Type::IntegerTyID: {
142 if (!Signed)
143 return (Twine('u') + getTypeName(Ty, Signed: true)).str();
144
145 auto BitWidth = Ty->getIntegerBitWidth();
146 switch (BitWidth) {
147 case 8:
148 return "char";
149 case 16:
150 return "short";
151 case 32:
152 return "int";
153 case 64:
154 return "long";
155 default:
156 return (Twine('i') + Twine(BitWidth)).str();
157 }
158 }
159 case Type::HalfTyID:
160 return "half";
161 case Type::FloatTyID:
162 return "float";
163 case Type::DoubleTyID:
164 return "double";
165 case Type::FixedVectorTyID: {
166 auto VecTy = cast<FixedVectorType>(Val: Ty);
167 auto ElTy = VecTy->getElementType();
168 auto NumElements = VecTy->getNumElements();
169 return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str();
170 }
171 default:
172 return "unknown";
173 }
174}
175
176msgpack::ArrayDocNode
177MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
178 auto Dims = HSAMetadataDoc->getArrayNode();
179 if (Node->getNumOperands() != 3)
180 return Dims;
181
182 for (auto &Op : Node->operands())
183 Dims.push_back(N: Dims.getDocument()->getNode(
184 V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue())));
185 return Dims;
186}
187
188void MetadataStreamerMsgPackV4::emitVersion() {
189 auto Version = HSAMetadataDoc->getArrayNode();
190 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4));
191 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4));
192 getRootMetadata(Key: "amdhsa.version") = Version;
193}
194
195void MetadataStreamerMsgPackV4::emitTargetID(
196 const IsaInfo::AMDGPUTargetID &TargetID) {
197 getRootMetadata(Key: "amdhsa.target") =
198 HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true);
199}
200
201void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
202 auto Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts");
203 if (!Node)
204 return;
205
206 auto Printf = HSAMetadataDoc->getArrayNode();
207 for (auto *Op : Node->operands())
208 if (Op->getNumOperands())
209 Printf.push_back(N: Printf.getDocument()->getNode(
210 V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true));
211 getRootMetadata(Key: "amdhsa.printf") = Printf;
212}
213
214void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
215 msgpack::MapDocNode Kern) {
216 // TODO: What about other languages?
217 auto Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version");
218 if (!Node || !Node->getNumOperands())
219 return;
220 auto Op0 = Node->getOperand(i: 0);
221 if (Op0->getNumOperands() <= 1)
222 return;
223
224 Kern[".language"] = Kern.getDocument()->getNode(V: "OpenCL C");
225 auto LanguageVersion = Kern.getDocument()->getArrayNode();
226 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
227 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue()));
228 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
229 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue()));
230 Kern[".language_version"] = LanguageVersion;
231}
232
233void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
234 msgpack::MapDocNode Kern) {
235
236 if (auto Node = Func.getMetadata(Kind: "reqd_work_group_size"))
237 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
238 if (auto Node = Func.getMetadata(Kind: "work_group_size_hint"))
239 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
240 if (auto Node = Func.getMetadata(Kind: "vec_type_hint")) {
241 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
242 V: getTypeName(
243 Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(),
244 Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()),
245 /*Copy=*/true);
246 }
247 if (Func.hasFnAttribute(Kind: "runtime-handle")) {
248 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
249 V: Func.getFnAttribute(Kind: "runtime-handle").getValueAsString().str(),
250 /*Copy=*/true);
251 }
252 if (Func.hasFnAttribute(Kind: "device-init"))
253 Kern[".kind"] = Kern.getDocument()->getNode(V: "init");
254 else if (Func.hasFnAttribute(Kind: "device-fini"))
255 Kern[".kind"] = Kern.getDocument()->getNode(V: "fini");
256}
257
258void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
259 msgpack::MapDocNode Kern) {
260 auto &Func = MF.getFunction();
261 unsigned Offset = 0;
262 auto Args = HSAMetadataDoc->getArrayNode();
263 for (auto &Arg : Func.args())
264 emitKernelArg(Arg, Offset, Args);
265
266 emitHiddenKernelArgs(MF, Offset, Args);
267
268 Kern[".args"] = Args;
269}
270
271void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
272 unsigned &Offset,
273 msgpack::ArrayDocNode Args) {
274 auto Func = Arg.getParent();
275 auto ArgNo = Arg.getArgNo();
276 const MDNode *Node;
277
278 StringRef Name;
279 Node = Func->getMetadata(Kind: "kernel_arg_name");
280 if (Node && ArgNo < Node->getNumOperands())
281 Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
282 else if (Arg.hasName())
283 Name = Arg.getName();
284
285 StringRef TypeName;
286 Node = Func->getMetadata(Kind: "kernel_arg_type");
287 if (Node && ArgNo < Node->getNumOperands())
288 TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
289
290 StringRef BaseTypeName;
291 Node = Func->getMetadata(Kind: "kernel_arg_base_type");
292 if (Node && ArgNo < Node->getNumOperands())
293 BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
294
295 StringRef ActAccQual;
296 // Do we really need NoAlias check here?
297 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
298 if (Arg.onlyReadsMemory())
299 ActAccQual = "read_only";
300 else if (Arg.hasAttribute(Kind: Attribute::WriteOnly))
301 ActAccQual = "write_only";
302 }
303
304 StringRef AccQual;
305 Node = Func->getMetadata(Kind: "kernel_arg_access_qual");
306 if (Node && ArgNo < Node->getNumOperands())
307 AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
308
309 StringRef TypeQual;
310 Node = Func->getMetadata(Kind: "kernel_arg_type_qual");
311 if (Node && ArgNo < Node->getNumOperands())
312 TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
313
314 const DataLayout &DL = Func->getDataLayout();
315
316 MaybeAlign PointeeAlign;
317 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
318
319 // FIXME: Need to distinguish in memory alignment from pointer alignment.
320 if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) {
321 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
322 PointeeAlign = Arg.getParamAlign().valueOrOne();
323 }
324
325 // There's no distinction between byval aggregates and raw aggregates.
326 Type *ArgTy;
327 Align ArgAlign;
328 std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL);
329
330 emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign,
331 ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args,
332 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
333 AccQual, TypeQual);
334}
335
336void MetadataStreamerMsgPackV4::emitKernelArg(
337 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
338 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
339 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
340 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
341 auto Arg = Args.getDocument()->getMapNode();
342
343 if (!Name.empty())
344 Arg[".name"] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true);
345 if (!TypeName.empty())
346 Arg[".type_name"] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true);
347 auto Size = DL.getTypeAllocSize(Ty);
348 Arg[".size"] = Arg.getDocument()->getNode(V: Size);
349 Offset = alignTo(Size: Offset, A: Alignment);
350 Arg[".offset"] = Arg.getDocument()->getNode(V: Offset);
351 Offset += Size;
352 Arg[".value_kind"] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true);
353 if (PointeeAlign)
354 Arg[".pointee_align"] = Arg.getDocument()->getNode(V: PointeeAlign->value());
355
356 if (auto PtrTy = dyn_cast<PointerType>(Val: Ty))
357 if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace()))
358 // Limiting address space to emit only for a certain ValueKind.
359 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
360 Arg[".address_space"] = Arg.getDocument()->getNode(V: *Qualifier,
361 /*Copy=*/true);
362
363 if (auto AQ = getAccessQualifier(AccQual))
364 Arg[".access"] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true);
365
366 if (auto AAQ = getAccessQualifier(AccQual: ActAccQual))
367 Arg[".actual_access"] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true);
368
369 SmallVector<StringRef, 1> SplitTypeQuals;
370 TypeQual.split(A&: SplitTypeQuals, Separator: " ", MaxSplit: -1, KeepEmpty: false);
371 for (StringRef Key : SplitTypeQuals) {
372 if (Key == "const")
373 Arg[".is_const"] = Arg.getDocument()->getNode(V: true);
374 else if (Key == "restrict")
375 Arg[".is_restrict"] = Arg.getDocument()->getNode(V: true);
376 else if (Key == "volatile")
377 Arg[".is_volatile"] = Arg.getDocument()->getNode(V: true);
378 else if (Key == "pipe")
379 Arg[".is_pipe"] = Arg.getDocument()->getNode(V: true);
380 }
381
382 Args.push_back(N: Arg);
383}
384
385void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
386 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
387 auto &Func = MF.getFunction();
388 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
389
390 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(F: Func);
391 if (!HiddenArgNumBytes)
392 return;
393
394 const Module *M = Func.getParent();
395 auto &DL = M->getDataLayout();
396 auto Int64Ty = Type::getInt64Ty(C&: Func.getContext());
397
398 Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr());
399
400 if (HiddenArgNumBytes >= 8)
401 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset,
402 Args);
403 if (HiddenArgNumBytes >= 16)
404 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset,
405 Args);
406 if (HiddenArgNumBytes >= 24)
407 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset,
408 Args);
409
410 auto Int8PtrTy =
411 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
412
413 if (HiddenArgNumBytes >= 32) {
414 // We forbid the use of features requiring hostcall when compiling OpenCL
415 // before code object V5, which makes the mutual exclusion between the
416 // "printf buffer" and "hostcall buffer" here sound.
417 if (M->getNamedMetadata(Name: "llvm.printf.fmts"))
418 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
419 Args);
420 else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr"))
421 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
422 Args);
423 else
424 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
425 }
426
427 // Emit "default queue" and "completion action" arguments if enqueue kernel is
428 // used, otherwise emit dummy "none" arguments.
429 if (HiddenArgNumBytes >= 40) {
430 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
431 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
432 Args);
433 } else {
434 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
435 }
436 }
437
438 if (HiddenArgNumBytes >= 48) {
439 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
440 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
441 Args);
442 } else {
443 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
444 }
445 }
446
447 // Emit the pointer argument for multi-grid object.
448 if (HiddenArgNumBytes >= 56) {
449 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
450 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
451 Args);
452 } else {
453 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
454 }
455 }
456}
457
458msgpack::MapDocNode
459MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
460 const SIProgramInfo &ProgramInfo,
461 unsigned CodeObjectVersion) const {
462 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
463 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
464 const Function &F = MF.getFunction();
465
466 auto Kern = HSAMetadataDoc->getMapNode();
467
468 Align MaxKernArgAlign;
469 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
470 V: STM.getKernArgSegmentSize(F, MaxAlign&: MaxKernArgAlign));
471 Kern[".group_segment_fixed_size"] =
472 Kern.getDocument()->getNode(V: ProgramInfo.LDSSize);
473 DelayedExprs->assignDocNode(DN&: Kern[".private_segment_fixed_size"],
474 Type: msgpack::Type::UInt, ExprValue: ProgramInfo.ScratchSize);
475 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
476 DelayedExprs->assignDocNode(DN&: Kern[".uses_dynamic_stack"],
477 Type: msgpack::Type::Boolean,
478 ExprValue: ProgramInfo.DynamicCallStack);
479 }
480
481 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
482 Kern[".workgroup_processor_mode"] =
483 Kern.getDocument()->getNode(V: ProgramInfo.WgpMode);
484
485 // FIXME: The metadata treats the minimum as 16?
486 Kern[".kernarg_segment_align"] =
487 Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value());
488 Kern[".wavefront_size"] =
489 Kern.getDocument()->getNode(V: STM.getWavefrontSize());
490 DelayedExprs->assignDocNode(DN&: Kern[".sgpr_count"], Type: msgpack::Type::UInt,
491 ExprValue: ProgramInfo.NumSGPR);
492 DelayedExprs->assignDocNode(DN&: Kern[".vgpr_count"], Type: msgpack::Type::UInt,
493 ExprValue: ProgramInfo.NumVGPR);
494
495 // Only add AGPR count to metadata for supported devices
496 if (STM.hasMAIInsts()) {
497 DelayedExprs->assignDocNode(DN&: Kern[".agpr_count"], Type: msgpack::Type::UInt,
498 ExprValue: ProgramInfo.NumAccVGPR);
499 }
500
501 Kern[".max_flat_workgroup_size"] =
502 Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize());
503 unsigned NumWGX = MFI.getMaxNumWorkGroupsX();
504 unsigned NumWGY = MFI.getMaxNumWorkGroupsY();
505 unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ();
506 if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) {
507 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(V: NumWGX);
508 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(V: NumWGY);
509 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(V: NumWGZ);
510 }
511 Kern[".sgpr_spill_count"] =
512 Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs());
513 Kern[".vgpr_spill_count"] =
514 Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs());
515
516 return Kern;
517}
518
519bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
520 DelayedExprs->resolveDelayedExpressions();
521 return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true);
522}
523
524void MetadataStreamerMsgPackV4::begin(const Module &Mod,
525 const IsaInfo::AMDGPUTargetID &TargetID) {
526 emitVersion();
527 emitTargetID(TargetID);
528 emitPrintf(Mod);
529 getRootMetadata(Key: "amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
530 DelayedExprs->clear();
531}
532
533void MetadataStreamerMsgPackV4::end() {
534 DelayedExprs->resolveDelayedExpressions();
535 std::string HSAMetadataString;
536 raw_string_ostream StrOS(HSAMetadataString);
537 HSAMetadataDoc->toYAML(OS&: StrOS);
538
539 if (DumpHSAMetadata)
540 dump(HSAMetadataString: StrOS.str());
541 if (VerifyHSAMetadata)
542 verify(HSAMetadataString: StrOS.str());
543}
544
545void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
546 const SIProgramInfo &ProgramInfo) {
547 auto &Func = MF.getFunction();
548 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
549 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
550 return;
551
552 auto CodeObjectVersion =
553 AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent());
554 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
555
556 auto Kernels =
557 getRootMetadata(Key: "amdhsa.kernels").getArray(/*Convert=*/true);
558
559 {
560 Kern[".name"] = Kern.getDocument()->getNode(V: Func.getName());
561 Kern[".symbol"] = Kern.getDocument()->getNode(
562 V: (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
563 emitKernelLanguage(Func, Kern);
564 emitKernelAttrs(Func, Kern);
565 emitKernelArgs(MF, Kern);
566 }
567
568 Kernels.push_back(N: Kern);
569}
570
571//===----------------------------------------------------------------------===//
572// HSAMetadataStreamerV5
573//===----------------------------------------------------------------------===//
574
575void MetadataStreamerMsgPackV5::emitVersion() {
576 auto Version = HSAMetadataDoc->getArrayNode();
577 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5));
578 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5));
579 getRootMetadata(Key: "amdhsa.version") = Version;
580}
581
582void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
583 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
584 auto &Func = MF.getFunction();
585 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
586
587 // No implicit kernel argument is used.
588 if (ST.getImplicitArgNumBytes(F: Func) == 0)
589 return;
590
591 const Module *M = Func.getParent();
592 auto &DL = M->getDataLayout();
593 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
594
595 auto Int64Ty = Type::getInt64Ty(C&: Func.getContext());
596 auto Int32Ty = Type::getInt32Ty(C&: Func.getContext());
597 auto Int16Ty = Type::getInt16Ty(C&: Func.getContext());
598
599 Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr());
600 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x", Offset, Args);
601 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y", Offset, Args);
602 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z", Offset, Args);
603
604 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x", Offset, Args);
605 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y", Offset, Args);
606 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z", Offset, Args);
607
608 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x", Offset, Args);
609 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y", Offset, Args);
610 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z", Offset, Args);
611
612 // Reserved for hidden_tool_correlation_id.
613 Offset += 8;
614
615 Offset += 8; // Reserved.
616
617 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset, Args);
618 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset, Args);
619 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset, Args);
620
621 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims", Offset, Args);
622
623 Offset += 6; // Reserved.
624 auto Int8PtrTy =
625 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
626
627 if (M->getNamedMetadata(Name: "llvm.printf.fmts")) {
628 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
629 Args);
630 } else {
631 Offset += 8; // Skipped.
632 }
633
634 if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr")) {
635 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
636 Args);
637 } else {
638 Offset += 8; // Skipped.
639 }
640
641 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
642 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
643 Args);
644 } else {
645 Offset += 8; // Skipped.
646 }
647
648 if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr"))
649 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1", Offset, Args);
650 else
651 Offset += 8; // Skipped.
652
653 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
654 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
655 Args);
656 } else {
657 Offset += 8; // Skipped.
658 }
659
660 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
661 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
662 Args);
663 } else {
664 Offset += 8; // Skipped.
665 }
666
667 // Emit argument for hidden dynamic lds size
668 if (MFI.isDynamicLDSUsed()) {
669 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size", Offset,
670 Args);
671 } else {
672 Offset += 4; // skipped
673 }
674
675 Offset += 68; // Reserved.
676
677 // hidden_private_base and hidden_shared_base are only when the subtarget has
678 // ApertureRegs.
679 if (!ST.hasApertureRegs()) {
680 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base", Offset, Args);
681 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base", Offset, Args);
682 } else {
683 Offset += 8; // Skipped.
684 }
685
686 if (MFI.getUserSGPRInfo().hasQueuePtr())
687 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr", Offset, Args);
688}
689
690void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
691 msgpack::MapDocNode Kern) {
692 MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
693
694 if (Func.getFnAttribute(Kind: "uniform-work-group-size").getValueAsBool())
695 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(V: 1);
696}
697
698//===----------------------------------------------------------------------===//
699// HSAMetadataStreamerV6
700//===----------------------------------------------------------------------===//
701
702void MetadataStreamerMsgPackV6::emitVersion() {
703 auto Version = HSAMetadataDoc->getArrayNode();
704 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6));
705 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6));
706 getRootMetadata(Key: "amdhsa.version") = Version;
707}
708
709} // end namespace AMDGPU::HSAMD
710} // end namespace llvm
711