1//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// AMDGPU HSA Metadata Streamer.
11///
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPUHSAMetadataStreamer.h"
16#include "AMDGPU.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUTargetStreamer.h"
19#include "SIMachineFunctionInfo.h"
20#include "SIProgramInfo.h"
21#include "llvm/IR/Module.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCExpr.h"
24#include "llvm/Target/TargetLoweringObjectFile.h"
25
26using namespace llvm;
27
28static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
29 const DataLayout &DL) {
30 Type *Ty = Arg.getType();
31 MaybeAlign ArgAlign;
32 if (Arg.hasByRefAttr()) {
33 Ty = Arg.getParamByRefType();
34 ArgAlign = Arg.getParamAlign();
35 }
36
37 if (!ArgAlign)
38 ArgAlign = DL.getABITypeAlign(Ty);
39
40 return std::pair(Ty, *ArgAlign);
41}
42
43/// Find the mangled symbol name for the runtime handle for \p EnqueuedBlock
44static std::string getEnqueuedBlockSymbolName(const AMDGPUTargetMachine &TM,
45 const Function &EnqueuedBlock) {
46 const MDNode *Associated =
47 EnqueuedBlock.getMetadata(KindID: LLVMContext::MD_associated);
48 if (!Associated)
49 return "";
50
51 auto *VM = cast<ValueAsMetadata>(Val: Associated->getOperand(I: 0));
52 auto *RuntimeHandle =
53 dyn_cast<GlobalVariable>(Val: VM->getValue()->stripPointerCasts());
54 if (!RuntimeHandle ||
55 RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle")
56 return "";
57
58 SmallString<128> Name;
59 TM.getNameWithPrefix(Name, GV: RuntimeHandle,
60 Mang&: TM.getObjFileLowering()->getMangler());
61 return Name.str().str();
62}
63
64namespace llvm {
65
66static cl::opt<bool> DumpHSAMetadata(
67 "amdgpu-dump-hsa-metadata",
68 cl::desc("Dump AMDGPU HSA Metadata"));
69static cl::opt<bool> VerifyHSAMetadata(
70 "amdgpu-verify-hsa-metadata",
71 cl::desc("Verify AMDGPU HSA Metadata"));
72
73namespace AMDGPU::HSAMD {
74
75//===----------------------------------------------------------------------===//
76// HSAMetadataStreamerV4
77//===----------------------------------------------------------------------===//
78
79void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
80 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
81}
82
83void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
84 errs() << "AMDGPU HSA Metadata Parser Test: ";
85
86 msgpack::Document FromHSAMetadataString;
87
88 if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) {
89 errs() << "FAIL\n";
90 return;
91 }
92
93 std::string ToHSAMetadataString;
94 raw_string_ostream StrOS(ToHSAMetadataString);
95 FromHSAMetadataString.toYAML(OS&: StrOS);
96
97 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
98 if (HSAMetadataString != ToHSAMetadataString) {
99 errs() << "Original input: " << HSAMetadataString << '\n'
100 << "Produced output: " << StrOS.str() << '\n';
101 }
102}
103
104std::optional<StringRef>
105MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
106 return StringSwitch<std::optional<StringRef>>(AccQual)
107 .Case(S: "read_only", Value: StringRef("read_only"))
108 .Case(S: "write_only", Value: StringRef("write_only"))
109 .Case(S: "read_write", Value: StringRef("read_write"))
110 .Default(Value: std::nullopt);
111}
112
113std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
114 unsigned AddressSpace) const {
115 switch (AddressSpace) {
116 case AMDGPUAS::PRIVATE_ADDRESS:
117 return StringRef("private");
118 case AMDGPUAS::GLOBAL_ADDRESS:
119 return StringRef("global");
120 case AMDGPUAS::CONSTANT_ADDRESS:
121 return StringRef("constant");
122 case AMDGPUAS::LOCAL_ADDRESS:
123 return StringRef("local");
124 case AMDGPUAS::FLAT_ADDRESS:
125 return StringRef("generic");
126 case AMDGPUAS::REGION_ADDRESS:
127 return StringRef("region");
128 default:
129 return std::nullopt;
130 }
131}
132
133StringRef
134MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
135 StringRef BaseTypeName) const {
136 if (TypeQual.contains(Other: "pipe"))
137 return "pipe";
138
139 return StringSwitch<StringRef>(BaseTypeName)
140 .Case(S: "image1d_t", Value: "image")
141 .Case(S: "image1d_array_t", Value: "image")
142 .Case(S: "image1d_buffer_t", Value: "image")
143 .Case(S: "image2d_t", Value: "image")
144 .Case(S: "image2d_array_t", Value: "image")
145 .Case(S: "image2d_array_depth_t", Value: "image")
146 .Case(S: "image2d_array_msaa_t", Value: "image")
147 .Case(S: "image2d_array_msaa_depth_t", Value: "image")
148 .Case(S: "image2d_depth_t", Value: "image")
149 .Case(S: "image2d_msaa_t", Value: "image")
150 .Case(S: "image2d_msaa_depth_t", Value: "image")
151 .Case(S: "image3d_t", Value: "image")
152 .Case(S: "sampler_t", Value: "sampler")
153 .Case(S: "queue_t", Value: "queue")
154 .Default(Value: isa<PointerType>(Val: Ty)
155 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
156 ? "dynamic_shared_pointer"
157 : "global_buffer")
158 : "by_value");
159}
160
161std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
162 bool Signed) const {
163 switch (Ty->getTypeID()) {
164 case Type::IntegerTyID: {
165 if (!Signed)
166 return (Twine('u') + getTypeName(Ty, Signed: true)).str();
167
168 auto BitWidth = Ty->getIntegerBitWidth();
169 switch (BitWidth) {
170 case 8:
171 return "char";
172 case 16:
173 return "short";
174 case 32:
175 return "int";
176 case 64:
177 return "long";
178 default:
179 return (Twine('i') + Twine(BitWidth)).str();
180 }
181 }
182 case Type::HalfTyID:
183 return "half";
184 case Type::FloatTyID:
185 return "float";
186 case Type::DoubleTyID:
187 return "double";
188 case Type::FixedVectorTyID: {
189 auto *VecTy = cast<FixedVectorType>(Val: Ty);
190 auto *ElTy = VecTy->getElementType();
191 auto NumElements = VecTy->getNumElements();
192 return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str();
193 }
194 default:
195 return "unknown";
196 }
197}
198
199msgpack::ArrayDocNode
200MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
201 auto Dims = HSAMetadataDoc->getArrayNode();
202 if (Node->getNumOperands() != 3)
203 return Dims;
204
205 for (auto &Op : Node->operands())
206 Dims.push_back(N: Dims.getDocument()->getNode(
207 V: mdconst::extract<ConstantInt>(MD: Op)->getZExtValue()));
208 return Dims;
209}
210
211void MetadataStreamerMsgPackV4::emitVersion() {
212 auto Version = HSAMetadataDoc->getArrayNode();
213 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4));
214 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4));
215 getRootMetadata(Key: "amdhsa.version") = Version;
216}
217
218void MetadataStreamerMsgPackV4::emitTargetID(const TargetID &TargetID) {
219 getRootMetadata(Key: "amdhsa.target") =
220 HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true);
221}
222
223void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
224 auto *Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts");
225 if (!Node)
226 return;
227
228 auto Printf = HSAMetadataDoc->getArrayNode();
229 for (auto *Op : Node->operands())
230 if (Op->getNumOperands())
231 Printf.push_back(N: Printf.getDocument()->getNode(
232 V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true));
233 getRootMetadata(Key: "amdhsa.printf") = Printf;
234}
235
236void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
237 msgpack::MapDocNode Kern) {
238 // TODO: What about other languages?
239 auto *Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version");
240 if (!Node || !Node->getNumOperands())
241 return;
242 auto *Op0 = Node->getOperand(i: 0);
243 if (Op0->getNumOperands() <= 1)
244 return;
245
246 Kern[".language"] = Kern.getDocument()->getNode(V: "OpenCL C");
247 auto LanguageVersion = Kern.getDocument()->getArrayNode();
248 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
249 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue()));
250 LanguageVersion.push_back(N: Kern.getDocument()->getNode(
251 V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue()));
252 Kern[".language_version"] = LanguageVersion;
253}
254
255void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
256 const MachineFunction &MF,
257 msgpack::MapDocNode Kern) {
258 const Function &Func = MF.getFunction();
259 if (auto *Node = Func.getMetadata(Kind: "reqd_work_group_size"))
260 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
261 if (auto *Node = Func.getMetadata(Kind: "work_group_size_hint"))
262 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
263 if (auto *Node = Func.getMetadata(Kind: "vec_type_hint")) {
264 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
265 V: getTypeName(
266 Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(),
267 Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()),
268 /*Copy=*/true);
269 }
270
271 std::string HandleName = getEnqueuedBlockSymbolName(TM, EnqueuedBlock: Func);
272 if (!HandleName.empty()) {
273 Kern[".device_enqueue_symbol"] =
274 Kern.getDocument()->getNode(V: std::move(HandleName), /*Copy=*/true);
275 }
276
277 if (Func.hasFnAttribute(Kind: "device-init"))
278 Kern[".kind"] = Kern.getDocument()->getNode(V: "init");
279 else if (Func.hasFnAttribute(Kind: "device-fini"))
280 Kern[".kind"] = Kern.getDocument()->getNode(V: "fini");
281}
282
283void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
284 msgpack::MapDocNode Kern) {
285 auto &Func = MF.getFunction();
286 unsigned Offset = 0;
287 auto Args = HSAMetadataDoc->getArrayNode();
288 for (auto &Arg : Func.args()) {
289 if (Arg.hasAttribute(Kind: "amdgpu-hidden-argument"))
290 continue;
291
292 emitKernelArg(Arg, Offset, Args);
293 }
294
295 emitHiddenKernelArgs(MF, Offset, Args);
296
297 Kern[".args"] = Args;
298}
299
300void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
301 unsigned &Offset,
302 msgpack::ArrayDocNode Args) {
303 const auto *Func = Arg.getParent();
304 auto ArgNo = Arg.getArgNo();
305 const MDNode *Node;
306
307 StringRef Name;
308 Node = Func->getMetadata(Kind: "kernel_arg_name");
309 if (Node && ArgNo < Node->getNumOperands())
310 Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
311 else if (Arg.hasName())
312 Name = Arg.getName();
313
314 StringRef TypeName;
315 Node = Func->getMetadata(Kind: "kernel_arg_type");
316 if (Node && ArgNo < Node->getNumOperands())
317 TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
318
319 StringRef BaseTypeName;
320 Node = Func->getMetadata(Kind: "kernel_arg_base_type");
321 if (Node && ArgNo < Node->getNumOperands())
322 BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
323
324 StringRef ActAccQual;
325 // Do we really need NoAlias check here?
326 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
327 if (Arg.onlyReadsMemory())
328 ActAccQual = "read_only";
329 else if (Arg.hasAttribute(Kind: Attribute::WriteOnly))
330 ActAccQual = "write_only";
331 }
332
333 StringRef AccQual;
334 Node = Func->getMetadata(Kind: "kernel_arg_access_qual");
335 if (Node && ArgNo < Node->getNumOperands())
336 AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
337
338 StringRef TypeQual;
339 Node = Func->getMetadata(Kind: "kernel_arg_type_qual");
340 if (Node && ArgNo < Node->getNumOperands())
341 TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString();
342
343 const DataLayout &DL = Func->getDataLayout();
344
345 MaybeAlign PointeeAlign;
346 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
347
348 // FIXME: Need to distinguish in memory alignment from pointer alignment.
349 if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty)) {
350 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
351 PointeeAlign = Arg.getParamAlign().valueOrOne();
352 }
353
354 // There's no distinction between byval aggregates and raw aggregates.
355 Type *ArgTy;
356 Align ArgAlign;
357 std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL);
358
359 emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign,
360 ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args,
361 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
362 AccQual, TypeQual);
363}
364
365void MetadataStreamerMsgPackV4::emitKernelArg(
366 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
367 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
368 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
369 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
370 auto Arg = Args.getDocument()->getMapNode();
371
372 if (!Name.empty())
373 Arg[".name"] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true);
374 if (!TypeName.empty())
375 Arg[".type_name"] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true);
376 auto Size = DL.getTypeAllocSize(Ty);
377 Arg[".size"] = Arg.getDocument()->getNode(V: Size);
378 Offset = alignTo(Size: Offset, A: Alignment);
379 Arg[".offset"] = Arg.getDocument()->getNode(V: Offset);
380 Offset += Size;
381 Arg[".value_kind"] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true);
382 if (PointeeAlign)
383 Arg[".pointee_align"] = Arg.getDocument()->getNode(V: PointeeAlign->value());
384
385 if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty))
386 if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace()))
387 // Limiting address space to emit only for a certain ValueKind.
388 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
389 Arg[".address_space"] = Arg.getDocument()->getNode(V: *Qualifier,
390 /*Copy=*/true);
391
392 if (auto AQ = getAccessQualifier(AccQual))
393 Arg[".access"] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true);
394
395 if (auto AAQ = getAccessQualifier(AccQual: ActAccQual))
396 Arg[".actual_access"] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true);
397
398 SmallVector<StringRef, 1> SplitTypeQuals;
399 TypeQual.split(A&: SplitTypeQuals, Separator: " ", MaxSplit: -1, KeepEmpty: false);
400 for (StringRef Key : SplitTypeQuals) {
401 if (Key == "const")
402 Arg[".is_const"] = Arg.getDocument()->getNode(V: true);
403 else if (Key == "restrict")
404 Arg[".is_restrict"] = Arg.getDocument()->getNode(V: true);
405 else if (Key == "volatile")
406 Arg[".is_volatile"] = Arg.getDocument()->getNode(V: true);
407 else if (Key == "pipe")
408 Arg[".is_pipe"] = Arg.getDocument()->getNode(V: true);
409 }
410
411 Args.push_back(N: Arg);
412}
413
414void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
415 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
416 auto &Func = MF.getFunction();
417 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
418
419 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(F: Func);
420 if (!HiddenArgNumBytes)
421 return;
422
423 const Module *M = Func.getParent();
424 auto &DL = M->getDataLayout();
425 auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext());
426
427 Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr());
428
429 if (HiddenArgNumBytes >= 8)
430 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset,
431 Args);
432 if (HiddenArgNumBytes >= 16)
433 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset,
434 Args);
435 if (HiddenArgNumBytes >= 24)
436 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset,
437 Args);
438
439 auto *Int8PtrTy =
440 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
441
442 if (HiddenArgNumBytes >= 32) {
443 // We forbid the use of features requiring hostcall when compiling OpenCL
444 // before code object V5, which makes the mutual exclusion between the
445 // "printf buffer" and "hostcall buffer" here sound.
446 if (M->getNamedMetadata(Name: "llvm.printf.fmts"))
447 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
448 Args);
449 else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr"))
450 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
451 Args);
452 else
453 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
454 }
455
456 // Emit "default queue" and "completion action" arguments if enqueue kernel is
457 // used, otherwise emit dummy "none" arguments.
458 if (HiddenArgNumBytes >= 40) {
459 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
460 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
461 Args);
462 } else {
463 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
464 }
465 }
466
467 if (HiddenArgNumBytes >= 48) {
468 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
469 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
470 Args);
471 } else {
472 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
473 }
474 }
475
476 // Emit the pointer argument for multi-grid object.
477 if (HiddenArgNumBytes >= 56) {
478 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
479 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
480 Args);
481 } else {
482 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none", Offset, Args);
483 }
484 }
485}
486
487msgpack::MapDocNode
488MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
489 const SIProgramInfo &ProgramInfo,
490 unsigned CodeObjectVersion) const {
491 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
492 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
493 const Function &F = MF.getFunction();
494
495 auto Kern = HSAMetadataDoc->getMapNode();
496
497 Align MaxKernArgAlign;
498 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
499 V: STM.getKernArgSegmentSize(F, MaxAlign&: MaxKernArgAlign));
500 Kern[".group_segment_fixed_size"] =
501 Kern.getDocument()->getNode(V: ProgramInfo.LDSSize);
502 DelayedExprs->assignDocNode(DN&: Kern[".private_segment_fixed_size"],
503 Type: msgpack::Type::UInt, ExprValue: ProgramInfo.ScratchSize);
504 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
505 DelayedExprs->assignDocNode(DN&: Kern[".uses_dynamic_stack"],
506 Type: msgpack::Type::Boolean,
507 ExprValue: ProgramInfo.DynamicCallStack);
508 }
509
510 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
511 Kern[".workgroup_processor_mode"] =
512 Kern.getDocument()->getNode(V: ProgramInfo.WgpMode);
513
514 // FIXME: The metadata treats the minimum as 16?
515 Kern[".kernarg_segment_align"] =
516 Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value());
517 Kern[".wavefront_size"] =
518 Kern.getDocument()->getNode(V: STM.getWavefrontSize());
519 DelayedExprs->assignDocNode(DN&: Kern[".sgpr_count"], Type: msgpack::Type::UInt,
520 ExprValue: ProgramInfo.NumSGPR);
521 DelayedExprs->assignDocNode(DN&: Kern[".vgpr_count"], Type: msgpack::Type::UInt,
522 ExprValue: ProgramInfo.NumVGPR);
523
524 // Only add AGPR count to metadata for supported devices
525 if (STM.hasMAIInsts()) {
526 DelayedExprs->assignDocNode(DN&: Kern[".agpr_count"], Type: msgpack::Type::UInt,
527 ExprValue: ProgramInfo.NumAccVGPR);
528 }
529
530 Kern[".max_flat_workgroup_size"] =
531 Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize());
532
533 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
534 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
535 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
536
537 // TODO: Should consider 0 invalid and reject in IR verifier.
538 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
539 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(V: NumWGX);
540
541 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
542 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(V: NumWGY);
543
544 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
545 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(V: NumWGZ);
546
547 Kern[".sgpr_spill_count"] =
548 Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs());
549 Kern[".vgpr_spill_count"] =
550 Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs());
551
552 return Kern;
553}
554
555bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
556 DelayedExprs->resolveDelayedExpressions();
557 return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true);
558}
559
560void MetadataStreamerMsgPackV4::begin(const Module &Mod,
561 const TargetID &TargetID) {
562 emitVersion();
563 emitTargetID(TargetID);
564 emitPrintf(Mod);
565 getRootMetadata(Key: "amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
566 DelayedExprs->clear();
567}
568
569void MetadataStreamerMsgPackV4::end() {
570 DelayedExprs->resolveDelayedExpressions();
571 std::string HSAMetadataString;
572 raw_string_ostream StrOS(HSAMetadataString);
573 HSAMetadataDoc->toYAML(OS&: StrOS);
574
575 if (DumpHSAMetadata)
576 dump(HSAMetadataString: StrOS.str());
577 if (VerifyHSAMetadata)
578 verify(HSAMetadataString: StrOS.str());
579}
580
581void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
582 const SIProgramInfo &ProgramInfo) {
583 auto &Func = MF.getFunction();
584 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
585 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
586 return;
587
588 auto CodeObjectVersion =
589 AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent());
590 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
591
592 auto Kernels =
593 getRootMetadata(Key: "amdhsa.kernels").getArray(/*Convert=*/true);
594
595 auto &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
596 {
597 Kern[".name"] = Kern.getDocument()->getNode(V: Func.getName());
598 Kern[".symbol"] = Kern.getDocument()->getNode(
599 V: (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
600 emitKernelLanguage(Func, Kern);
601 emitKernelAttrs(TM, MF, Kern);
602 emitKernelArgs(MF, Kern);
603 }
604
605 Kernels.push_back(N: Kern);
606}
607
608//===----------------------------------------------------------------------===//
609// HSAMetadataStreamerV5
610//===----------------------------------------------------------------------===//
611
612void MetadataStreamerMsgPackV5::emitVersion() {
613 auto Version = HSAMetadataDoc->getArrayNode();
614 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5));
615 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5));
616 getRootMetadata(Key: "amdhsa.version") = Version;
617}
618
619void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
620 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
621 auto &Func = MF.getFunction();
622 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
623
624 // No implicit kernel argument is used.
625 if (ST.getImplicitArgNumBytes(F: Func) == 0)
626 return;
627
628 const Module *M = Func.getParent();
629 auto &DL = M->getDataLayout();
630 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
631
632 auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext());
633 auto *Int32Ty = Type::getInt32Ty(C&: Func.getContext());
634 auto *Int16Ty = Type::getInt16Ty(C&: Func.getContext());
635
636 Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr());
637 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x", Offset, Args);
638 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y", Offset, Args);
639 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z", Offset, Args);
640
641 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x", Offset, Args);
642 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y", Offset, Args);
643 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z", Offset, Args);
644
645 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x", Offset, Args);
646 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y", Offset, Args);
647 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z", Offset, Args);
648
649 // Reserved for hidden_tool_correlation_id.
650 Offset += 8;
651
652 Offset += 8; // Reserved.
653
654 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x", Offset, Args);
655 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y", Offset, Args);
656 emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z", Offset, Args);
657
658 emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims", Offset, Args);
659
660 Offset += 6; // Reserved.
661 auto *Int8PtrTy =
662 PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS);
663
664 if (M->getNamedMetadata(Name: "llvm.printf.fmts")) {
665 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer", Offset,
666 Args);
667 } else {
668 Offset += 8; // Skipped.
669 }
670
671 if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr")) {
672 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer", Offset,
673 Args);
674 } else {
675 Offset += 8; // Skipped.
676 }
677
678 if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg")) {
679 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg", Offset,
680 Args);
681 } else {
682 Offset += 8; // Skipped.
683 }
684
685 if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr"))
686 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1", Offset, Args);
687 else
688 Offset += 8; // Skipped.
689
690 if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue")) {
691 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue", Offset,
692 Args);
693 } else {
694 Offset += 8; // Skipped.
695 }
696
697 if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action")) {
698 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action", Offset,
699 Args);
700 } else {
701 Offset += 8; // Skipped.
702 }
703
704 // Emit argument for hidden dynamic lds size
705 if (MFI.isDynamicLDSUsed()) {
706 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size", Offset,
707 Args);
708 } else {
709 Offset += 4; // skipped
710 }
711
712 Offset += 68; // Reserved.
713
714 // hidden_private_base and hidden_shared_base are only when the subtarget has
715 // ApertureRegs.
716 if (!ST.hasApertureRegs()) {
717 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base", Offset, Args);
718 emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base", Offset, Args);
719 } else {
720 Offset += 8; // Skipped.
721 }
722
723 if (MFI.getUserSGPRInfo().hasQueuePtr())
724 emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr", Offset, Args);
725}
726
727void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
728 const MachineFunction &MF,
729 msgpack::MapDocNode Kern) {
730 MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
731
732 const Function &Func = MF.getFunction();
733 if (Func.hasFnAttribute(Kind: "uniform-work-group-size"))
734 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(V: 1);
735}
736
737//===----------------------------------------------------------------------===//
738// HSAMetadataStreamerV6
739//===----------------------------------------------------------------------===//
740
741void MetadataStreamerMsgPackV6::emitVersion() {
742 auto Version = HSAMetadataDoc->getArrayNode();
743 Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6));
744 Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6));
745 getRootMetadata(Key: "amdhsa.version") = Version;
746}
747
748void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
749 const MachineFunction &MF,
750 msgpack::MapDocNode Kern) {
751 MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
752
753 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
754 ClusterDimsAttr Attr = MFI.getClusterDims();
755 if (Attr.isFixedDims()) {
756 msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
757 ClusterDimsNode.push_back(N: HSAMetadataDoc->getNode(V: Attr.getDims()[0]));
758 ClusterDimsNode.push_back(N: HSAMetadataDoc->getNode(V: Attr.getDims()[1]));
759 ClusterDimsNode.push_back(N: HSAMetadataDoc->getNode(V: Attr.getDims()[2]));
760 Kern[".cluster_dims"] = ClusterDimsNode;
761 }
762}
763
764} // end namespace AMDGPU::HSAMD
765} // end namespace llvm
766