1 | //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// AMDGPU HSA Metadata Streamer. |
11 | /// |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AMDGPUHSAMetadataStreamer.h" |
16 | #include "AMDGPU.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
19 | #include "SIMachineFunctionInfo.h" |
20 | #include "SIProgramInfo.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/MC/MCContext.h" |
23 | #include "llvm/MC/MCExpr.h" |
24 | #include "llvm/Target/TargetLoweringObjectFile.h" |
25 | |
26 | using namespace llvm; |
27 | |
28 | static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, |
29 | const DataLayout &DL) { |
30 | Type *Ty = Arg.getType(); |
31 | MaybeAlign ArgAlign; |
32 | if (Arg.hasByRefAttr()) { |
33 | Ty = Arg.getParamByRefType(); |
34 | ArgAlign = Arg.getParamAlign(); |
35 | } |
36 | |
37 | if (!ArgAlign) |
38 | ArgAlign = DL.getABITypeAlign(Ty); |
39 | |
40 | return std::pair(Ty, *ArgAlign); |
41 | } |
42 | |
43 | /// Find the mangled symbol name for the runtime handle for \p EnqueuedBlock |
44 | static std::string getEnqueuedBlockSymbolName(const AMDGPUTargetMachine &TM, |
45 | const Function &EnqueuedBlock) { |
46 | const MDNode *Associated = |
47 | EnqueuedBlock.getMetadata(KindID: LLVMContext::MD_associated); |
48 | if (!Associated) |
49 | return "" ; |
50 | |
51 | auto *VM = cast<ValueAsMetadata>(Val: Associated->getOperand(I: 0)); |
52 | auto *RuntimeHandle = |
53 | dyn_cast<GlobalVariable>(Val: VM->getValue()->stripPointerCasts()); |
54 | if (!RuntimeHandle || |
55 | RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle" ) |
56 | return "" ; |
57 | |
58 | SmallString<128> Name; |
59 | TM.getNameWithPrefix(Name, GV: RuntimeHandle, |
60 | Mang&: TM.getObjFileLowering()->getMangler()); |
61 | return Name.str().str(); |
62 | } |
63 | |
64 | namespace llvm { |
65 | |
66 | static cl::opt<bool> DumpHSAMetadata( |
67 | "amdgpu-dump-hsa-metadata" , |
68 | cl::desc("Dump AMDGPU HSA Metadata" )); |
69 | static cl::opt<bool> VerifyHSAMetadata( |
70 | "amdgpu-verify-hsa-metadata" , |
71 | cl::desc("Verify AMDGPU HSA Metadata" )); |
72 | |
73 | namespace AMDGPU::HSAMD { |
74 | |
75 | //===----------------------------------------------------------------------===// |
76 | // HSAMetadataStreamerV4 |
77 | //===----------------------------------------------------------------------===// |
78 | |
79 | void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { |
80 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
81 | } |
82 | |
83 | void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { |
84 | errs() << "AMDGPU HSA Metadata Parser Test: " ; |
85 | |
86 | msgpack::Document FromHSAMetadataString; |
87 | |
88 | if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) { |
89 | errs() << "FAIL\n" ; |
90 | return; |
91 | } |
92 | |
93 | std::string ToHSAMetadataString; |
94 | raw_string_ostream StrOS(ToHSAMetadataString); |
95 | FromHSAMetadataString.toYAML(OS&: StrOS); |
96 | |
97 | errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL" ) << '\n'; |
98 | if (HSAMetadataString != ToHSAMetadataString) { |
99 | errs() << "Original input: " << HSAMetadataString << '\n' |
100 | << "Produced output: " << StrOS.str() << '\n'; |
101 | } |
102 | } |
103 | |
104 | std::optional<StringRef> |
105 | MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { |
106 | return StringSwitch<std::optional<StringRef>>(AccQual) |
107 | .Case(S: "read_only" , Value: StringRef("read_only" )) |
108 | .Case(S: "write_only" , Value: StringRef("write_only" )) |
109 | .Case(S: "read_write" , Value: StringRef("read_write" )) |
110 | .Default(Value: std::nullopt); |
111 | } |
112 | |
113 | std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( |
114 | unsigned AddressSpace) const { |
115 | switch (AddressSpace) { |
116 | case AMDGPUAS::PRIVATE_ADDRESS: |
117 | return StringRef("private" ); |
118 | case AMDGPUAS::GLOBAL_ADDRESS: |
119 | return StringRef("global" ); |
120 | case AMDGPUAS::CONSTANT_ADDRESS: |
121 | return StringRef("constant" ); |
122 | case AMDGPUAS::LOCAL_ADDRESS: |
123 | return StringRef("local" ); |
124 | case AMDGPUAS::FLAT_ADDRESS: |
125 | return StringRef("generic" ); |
126 | case AMDGPUAS::REGION_ADDRESS: |
127 | return StringRef("region" ); |
128 | default: |
129 | return std::nullopt; |
130 | } |
131 | } |
132 | |
133 | StringRef |
134 | MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, |
135 | StringRef BaseTypeName) const { |
136 | if (TypeQual.contains(Other: "pipe" )) |
137 | return "pipe" ; |
138 | |
139 | return StringSwitch<StringRef>(BaseTypeName) |
140 | .Case(S: "image1d_t" , Value: "image" ) |
141 | .Case(S: "image1d_array_t" , Value: "image" ) |
142 | .Case(S: "image1d_buffer_t" , Value: "image" ) |
143 | .Case(S: "image2d_t" , Value: "image" ) |
144 | .Case(S: "image2d_array_t" , Value: "image" ) |
145 | .Case(S: "image2d_array_depth_t" , Value: "image" ) |
146 | .Case(S: "image2d_array_msaa_t" , Value: "image" ) |
147 | .Case(S: "image2d_array_msaa_depth_t" , Value: "image" ) |
148 | .Case(S: "image2d_depth_t" , Value: "image" ) |
149 | .Case(S: "image2d_msaa_t" , Value: "image" ) |
150 | .Case(S: "image2d_msaa_depth_t" , Value: "image" ) |
151 | .Case(S: "image3d_t" , Value: "image" ) |
152 | .Case(S: "sampler_t" , Value: "sampler" ) |
153 | .Case(S: "queue_t" , Value: "queue" ) |
154 | .Default(Value: isa<PointerType>(Val: Ty) |
155 | ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS |
156 | ? "dynamic_shared_pointer" |
157 | : "global_buffer" ) |
158 | : "by_value" ); |
159 | } |
160 | |
161 | std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, |
162 | bool Signed) const { |
163 | switch (Ty->getTypeID()) { |
164 | case Type::IntegerTyID: { |
165 | if (!Signed) |
166 | return (Twine('u') + getTypeName(Ty, Signed: true)).str(); |
167 | |
168 | auto BitWidth = Ty->getIntegerBitWidth(); |
169 | switch (BitWidth) { |
170 | case 8: |
171 | return "char" ; |
172 | case 16: |
173 | return "short" ; |
174 | case 32: |
175 | return "int" ; |
176 | case 64: |
177 | return "long" ; |
178 | default: |
179 | return (Twine('i') + Twine(BitWidth)).str(); |
180 | } |
181 | } |
182 | case Type::HalfTyID: |
183 | return "half" ; |
184 | case Type::FloatTyID: |
185 | return "float" ; |
186 | case Type::DoubleTyID: |
187 | return "double" ; |
188 | case Type::FixedVectorTyID: { |
189 | auto *VecTy = cast<FixedVectorType>(Val: Ty); |
190 | auto *ElTy = VecTy->getElementType(); |
191 | auto NumElements = VecTy->getNumElements(); |
192 | return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str(); |
193 | } |
194 | default: |
195 | return "unknown" ; |
196 | } |
197 | } |
198 | |
199 | msgpack::ArrayDocNode |
200 | MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { |
201 | auto Dims = HSAMetadataDoc->getArrayNode(); |
202 | if (Node->getNumOperands() != 3) |
203 | return Dims; |
204 | |
205 | for (auto &Op : Node->operands()) |
206 | Dims.push_back(N: Dims.getDocument()->getNode( |
207 | V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue()))); |
208 | return Dims; |
209 | } |
210 | |
211 | void MetadataStreamerMsgPackV4::emitVersion() { |
212 | auto Version = HSAMetadataDoc->getArrayNode(); |
213 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4)); |
214 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4)); |
215 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
216 | } |
217 | |
218 | void MetadataStreamerMsgPackV4::emitTargetID( |
219 | const IsaInfo::AMDGPUTargetID &TargetID) { |
220 | getRootMetadata(Key: "amdhsa.target" ) = |
221 | HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true); |
222 | } |
223 | |
224 | void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { |
225 | auto *Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts" ); |
226 | if (!Node) |
227 | return; |
228 | |
229 | auto Printf = HSAMetadataDoc->getArrayNode(); |
230 | for (auto *Op : Node->operands()) |
231 | if (Op->getNumOperands()) |
232 | Printf.push_back(N: Printf.getDocument()->getNode( |
233 | V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true)); |
234 | getRootMetadata(Key: "amdhsa.printf" ) = Printf; |
235 | } |
236 | |
237 | void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, |
238 | msgpack::MapDocNode Kern) { |
239 | // TODO: What about other languages? |
240 | auto *Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version" ); |
241 | if (!Node || !Node->getNumOperands()) |
242 | return; |
243 | auto *Op0 = Node->getOperand(i: 0); |
244 | if (Op0->getNumOperands() <= 1) |
245 | return; |
246 | |
247 | Kern[".language" ] = Kern.getDocument()->getNode(V: "OpenCL C" ); |
248 | auto LanguageVersion = Kern.getDocument()->getArrayNode(); |
249 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
250 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue())); |
251 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
252 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue())); |
253 | Kern[".language_version" ] = LanguageVersion; |
254 | } |
255 | |
256 | void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM, |
257 | const Function &Func, |
258 | msgpack::MapDocNode Kern) { |
259 | |
260 | if (auto *Node = Func.getMetadata(Kind: "reqd_work_group_size" )) |
261 | Kern[".reqd_workgroup_size" ] = getWorkGroupDimensions(Node); |
262 | if (auto *Node = Func.getMetadata(Kind: "work_group_size_hint" )) |
263 | Kern[".workgroup_size_hint" ] = getWorkGroupDimensions(Node); |
264 | if (auto *Node = Func.getMetadata(Kind: "vec_type_hint" )) { |
265 | Kern[".vec_type_hint" ] = Kern.getDocument()->getNode( |
266 | V: getTypeName( |
267 | Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(), |
268 | Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()), |
269 | /*Copy=*/true); |
270 | } |
271 | |
272 | std::string HandleName = getEnqueuedBlockSymbolName(TM, EnqueuedBlock: Func); |
273 | if (!HandleName.empty()) { |
274 | Kern[".device_enqueue_symbol" ] = |
275 | Kern.getDocument()->getNode(V: std::move(HandleName), /*Copy=*/true); |
276 | } |
277 | |
278 | if (Func.hasFnAttribute(Kind: "device-init" )) |
279 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "init" ); |
280 | else if (Func.hasFnAttribute(Kind: "device-fini" )) |
281 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "fini" ); |
282 | } |
283 | |
284 | void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, |
285 | msgpack::MapDocNode Kern) { |
286 | auto &Func = MF.getFunction(); |
287 | unsigned Offset = 0; |
288 | auto Args = HSAMetadataDoc->getArrayNode(); |
289 | for (auto &Arg : Func.args()) { |
290 | if (Arg.hasAttribute(Kind: "amdgpu-hidden-argument" )) |
291 | continue; |
292 | |
293 | emitKernelArg(Arg, Offset, Args); |
294 | } |
295 | |
296 | emitHiddenKernelArgs(MF, Offset, Args); |
297 | |
298 | Kern[".args" ] = Args; |
299 | } |
300 | |
301 | void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, |
302 | unsigned &Offset, |
303 | msgpack::ArrayDocNode Args) { |
304 | const auto *Func = Arg.getParent(); |
305 | auto ArgNo = Arg.getArgNo(); |
306 | const MDNode *Node; |
307 | |
308 | StringRef Name; |
309 | Node = Func->getMetadata(Kind: "kernel_arg_name" ); |
310 | if (Node && ArgNo < Node->getNumOperands()) |
311 | Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
312 | else if (Arg.hasName()) |
313 | Name = Arg.getName(); |
314 | |
315 | StringRef TypeName; |
316 | Node = Func->getMetadata(Kind: "kernel_arg_type" ); |
317 | if (Node && ArgNo < Node->getNumOperands()) |
318 | TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
319 | |
320 | StringRef BaseTypeName; |
321 | Node = Func->getMetadata(Kind: "kernel_arg_base_type" ); |
322 | if (Node && ArgNo < Node->getNumOperands()) |
323 | BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
324 | |
325 | StringRef ActAccQual; |
326 | // Do we really need NoAlias check here? |
327 | if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { |
328 | if (Arg.onlyReadsMemory()) |
329 | ActAccQual = "read_only" ; |
330 | else if (Arg.hasAttribute(Kind: Attribute::WriteOnly)) |
331 | ActAccQual = "write_only" ; |
332 | } |
333 | |
334 | StringRef AccQual; |
335 | Node = Func->getMetadata(Kind: "kernel_arg_access_qual" ); |
336 | if (Node && ArgNo < Node->getNumOperands()) |
337 | AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
338 | |
339 | StringRef TypeQual; |
340 | Node = Func->getMetadata(Kind: "kernel_arg_type_qual" ); |
341 | if (Node && ArgNo < Node->getNumOperands()) |
342 | TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
343 | |
344 | const DataLayout &DL = Func->getDataLayout(); |
345 | |
346 | MaybeAlign PointeeAlign; |
347 | Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); |
348 | |
349 | // FIXME: Need to distinguish in memory alignment from pointer alignment. |
350 | if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty)) { |
351 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
352 | PointeeAlign = Arg.getParamAlign().valueOrOne(); |
353 | } |
354 | |
355 | // There's no distinction between byval aggregates and raw aggregates. |
356 | Type *ArgTy; |
357 | Align ArgAlign; |
358 | std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL); |
359 | |
360 | emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign, |
361 | ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args, |
362 | PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, |
363 | AccQual, TypeQual); |
364 | } |
365 | |
366 | void MetadataStreamerMsgPackV4::emitKernelArg( |
367 | const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, |
368 | unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, |
369 | StringRef Name, StringRef TypeName, StringRef BaseTypeName, |
370 | StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { |
371 | auto Arg = Args.getDocument()->getMapNode(); |
372 | |
373 | if (!Name.empty()) |
374 | Arg[".name" ] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true); |
375 | if (!TypeName.empty()) |
376 | Arg[".type_name" ] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true); |
377 | auto Size = DL.getTypeAllocSize(Ty); |
378 | Arg[".size" ] = Arg.getDocument()->getNode(V: Size); |
379 | Offset = alignTo(Size: Offset, A: Alignment); |
380 | Arg[".offset" ] = Arg.getDocument()->getNode(V: Offset); |
381 | Offset += Size; |
382 | Arg[".value_kind" ] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true); |
383 | if (PointeeAlign) |
384 | Arg[".pointee_align" ] = Arg.getDocument()->getNode(V: PointeeAlign->value()); |
385 | |
386 | if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty)) |
387 | if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace())) |
388 | // Limiting address space to emit only for a certain ValueKind. |
389 | if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer" ) |
390 | Arg[".address_space" ] = Arg.getDocument()->getNode(V: *Qualifier, |
391 | /*Copy=*/true); |
392 | |
393 | if (auto AQ = getAccessQualifier(AccQual)) |
394 | Arg[".access" ] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true); |
395 | |
396 | if (auto AAQ = getAccessQualifier(AccQual: ActAccQual)) |
397 | Arg[".actual_access" ] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true); |
398 | |
399 | SmallVector<StringRef, 1> SplitTypeQuals; |
400 | TypeQual.split(A&: SplitTypeQuals, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
401 | for (StringRef Key : SplitTypeQuals) { |
402 | if (Key == "const" ) |
403 | Arg[".is_const" ] = Arg.getDocument()->getNode(V: true); |
404 | else if (Key == "restrict" ) |
405 | Arg[".is_restrict" ] = Arg.getDocument()->getNode(V: true); |
406 | else if (Key == "volatile" ) |
407 | Arg[".is_volatile" ] = Arg.getDocument()->getNode(V: true); |
408 | else if (Key == "pipe" ) |
409 | Arg[".is_pipe" ] = Arg.getDocument()->getNode(V: true); |
410 | } |
411 | |
412 | Args.push_back(N: Arg); |
413 | } |
414 | |
415 | void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( |
416 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
417 | auto &Func = MF.getFunction(); |
418 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
419 | |
420 | unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(F: Func); |
421 | if (!HiddenArgNumBytes) |
422 | return; |
423 | |
424 | const Module *M = Func.getParent(); |
425 | auto &DL = M->getDataLayout(); |
426 | auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
427 | |
428 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
429 | |
430 | if (HiddenArgNumBytes >= 8) |
431 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, |
432 | Args); |
433 | if (HiddenArgNumBytes >= 16) |
434 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, |
435 | Args); |
436 | if (HiddenArgNumBytes >= 24) |
437 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, |
438 | Args); |
439 | |
440 | auto *Int8PtrTy = |
441 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
442 | |
443 | if (HiddenArgNumBytes >= 32) { |
444 | // We forbid the use of features requiring hostcall when compiling OpenCL |
445 | // before code object V5, which makes the mutual exclusion between the |
446 | // "printf buffer" and "hostcall buffer" here sound. |
447 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) |
448 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
449 | Args); |
450 | else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) |
451 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
452 | Args); |
453 | else |
454 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
455 | } |
456 | |
457 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
458 | // used, otherwise emit dummy "none" arguments. |
459 | if (HiddenArgNumBytes >= 40) { |
460 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
461 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
462 | Args); |
463 | } else { |
464 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
465 | } |
466 | } |
467 | |
468 | if (HiddenArgNumBytes >= 48) { |
469 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
470 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
471 | Args); |
472 | } else { |
473 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
474 | } |
475 | } |
476 | |
477 | // Emit the pointer argument for multi-grid object. |
478 | if (HiddenArgNumBytes >= 56) { |
479 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
480 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
481 | Args); |
482 | } else { |
483 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
484 | } |
485 | } |
486 | } |
487 | |
488 | msgpack::MapDocNode |
489 | MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, |
490 | const SIProgramInfo &ProgramInfo, |
491 | unsigned CodeObjectVersion) const { |
492 | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
493 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
494 | const Function &F = MF.getFunction(); |
495 | |
496 | auto Kern = HSAMetadataDoc->getMapNode(); |
497 | |
498 | Align MaxKernArgAlign; |
499 | Kern[".kernarg_segment_size" ] = Kern.getDocument()->getNode( |
500 | V: STM.getKernArgSegmentSize(F, MaxAlign&: MaxKernArgAlign)); |
501 | Kern[".group_segment_fixed_size" ] = |
502 | Kern.getDocument()->getNode(V: ProgramInfo.LDSSize); |
503 | DelayedExprs->assignDocNode(DN&: Kern[".private_segment_fixed_size" ], |
504 | Type: msgpack::Type::UInt, ExprValue: ProgramInfo.ScratchSize); |
505 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) { |
506 | DelayedExprs->assignDocNode(DN&: Kern[".uses_dynamic_stack" ], |
507 | Type: msgpack::Type::Boolean, |
508 | ExprValue: ProgramInfo.DynamicCallStack); |
509 | } |
510 | |
511 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) |
512 | Kern[".workgroup_processor_mode" ] = |
513 | Kern.getDocument()->getNode(V: ProgramInfo.WgpMode); |
514 | |
515 | // FIXME: The metadata treats the minimum as 16? |
516 | Kern[".kernarg_segment_align" ] = |
517 | Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value()); |
518 | Kern[".wavefront_size" ] = |
519 | Kern.getDocument()->getNode(V: STM.getWavefrontSize()); |
520 | DelayedExprs->assignDocNode(DN&: Kern[".sgpr_count" ], Type: msgpack::Type::UInt, |
521 | ExprValue: ProgramInfo.NumSGPR); |
522 | DelayedExprs->assignDocNode(DN&: Kern[".vgpr_count" ], Type: msgpack::Type::UInt, |
523 | ExprValue: ProgramInfo.NumVGPR); |
524 | |
525 | // Only add AGPR count to metadata for supported devices |
526 | if (STM.hasMAIInsts()) { |
527 | DelayedExprs->assignDocNode(DN&: Kern[".agpr_count" ], Type: msgpack::Type::UInt, |
528 | ExprValue: ProgramInfo.NumAccVGPR); |
529 | } |
530 | |
531 | Kern[".max_flat_workgroup_size" ] = |
532 | Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize()); |
533 | |
534 | uint32_t NumWGY = MFI.getMaxNumWorkGroupsY(); |
535 | uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ(); |
536 | uint32_t NumWGX = MFI.getMaxNumWorkGroupsX(); |
537 | |
538 | // TODO: Should consider 0 invalid and reject in IR verifier. |
539 | if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0) |
540 | Kern[".max_num_workgroups_x" ] = Kern.getDocument()->getNode(V: NumWGX); |
541 | |
542 | if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0) |
543 | Kern[".max_num_workgroups_y" ] = Kern.getDocument()->getNode(V: NumWGY); |
544 | |
545 | if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0) |
546 | Kern[".max_num_workgroups_z" ] = Kern.getDocument()->getNode(V: NumWGZ); |
547 | |
548 | Kern[".sgpr_spill_count" ] = |
549 | Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs()); |
550 | Kern[".vgpr_spill_count" ] = |
551 | Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs()); |
552 | |
553 | return Kern; |
554 | } |
555 | |
556 | bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
557 | DelayedExprs->resolveDelayedExpressions(); |
558 | return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true); |
559 | } |
560 | |
561 | void MetadataStreamerMsgPackV4::begin(const Module &Mod, |
562 | const IsaInfo::AMDGPUTargetID &TargetID) { |
563 | emitVersion(); |
564 | emitTargetID(TargetID); |
565 | emitPrintf(Mod); |
566 | getRootMetadata(Key: "amdhsa.kernels" ) = HSAMetadataDoc->getArrayNode(); |
567 | DelayedExprs->clear(); |
568 | } |
569 | |
570 | void MetadataStreamerMsgPackV4::end() { |
571 | DelayedExprs->resolveDelayedExpressions(); |
572 | std::string HSAMetadataString; |
573 | raw_string_ostream StrOS(HSAMetadataString); |
574 | HSAMetadataDoc->toYAML(OS&: StrOS); |
575 | |
576 | if (DumpHSAMetadata) |
577 | dump(HSAMetadataString: StrOS.str()); |
578 | if (VerifyHSAMetadata) |
579 | verify(HSAMetadataString: StrOS.str()); |
580 | } |
581 | |
582 | void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, |
583 | const SIProgramInfo &ProgramInfo) { |
584 | auto &Func = MF.getFunction(); |
585 | if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && |
586 | Func.getCallingConv() != CallingConv::SPIR_KERNEL) |
587 | return; |
588 | |
589 | auto CodeObjectVersion = |
590 | AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent()); |
591 | auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); |
592 | |
593 | auto Kernels = |
594 | getRootMetadata(Key: "amdhsa.kernels" ).getArray(/*Convert=*/true); |
595 | |
596 | auto &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget()); |
597 | { |
598 | Kern[".name" ] = Kern.getDocument()->getNode(V: Func.getName()); |
599 | Kern[".symbol" ] = Kern.getDocument()->getNode( |
600 | V: (Twine(Func.getName()) + Twine(".kd" )).str(), /*Copy=*/true); |
601 | emitKernelLanguage(Func, Kern); |
602 | emitKernelAttrs(TM, Func, Kern); |
603 | emitKernelArgs(MF, Kern); |
604 | } |
605 | |
606 | Kernels.push_back(N: Kern); |
607 | } |
608 | |
609 | //===----------------------------------------------------------------------===// |
610 | // HSAMetadataStreamerV5 |
611 | //===----------------------------------------------------------------------===// |
612 | |
613 | void MetadataStreamerMsgPackV5::emitVersion() { |
614 | auto Version = HSAMetadataDoc->getArrayNode(); |
615 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5)); |
616 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5)); |
617 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
618 | } |
619 | |
620 | void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( |
621 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
622 | auto &Func = MF.getFunction(); |
623 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
624 | |
625 | // No implicit kernel argument is used. |
626 | if (ST.getImplicitArgNumBytes(F: Func) == 0) |
627 | return; |
628 | |
629 | const Module *M = Func.getParent(); |
630 | auto &DL = M->getDataLayout(); |
631 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
632 | |
633 | auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
634 | auto *Int32Ty = Type::getInt32Ty(C&: Func.getContext()); |
635 | auto *Int16Ty = Type::getInt16Ty(C&: Func.getContext()); |
636 | |
637 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
638 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x" , Offset, Args); |
639 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y" , Offset, Args); |
640 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z" , Offset, Args); |
641 | |
642 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x" , Offset, Args); |
643 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y" , Offset, Args); |
644 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z" , Offset, Args); |
645 | |
646 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x" , Offset, Args); |
647 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y" , Offset, Args); |
648 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z" , Offset, Args); |
649 | |
650 | // Reserved for hidden_tool_correlation_id. |
651 | Offset += 8; |
652 | |
653 | Offset += 8; // Reserved. |
654 | |
655 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, Args); |
656 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, Args); |
657 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, Args); |
658 | |
659 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims" , Offset, Args); |
660 | |
661 | Offset += 6; // Reserved. |
662 | auto *Int8PtrTy = |
663 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
664 | |
665 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) { |
666 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
667 | Args); |
668 | } else { |
669 | Offset += 8; // Skipped. |
670 | } |
671 | |
672 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) { |
673 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
674 | Args); |
675 | } else { |
676 | Offset += 8; // Skipped. |
677 | } |
678 | |
679 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
680 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
681 | Args); |
682 | } else { |
683 | Offset += 8; // Skipped. |
684 | } |
685 | |
686 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr" )) |
687 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1" , Offset, Args); |
688 | else |
689 | Offset += 8; // Skipped. |
690 | |
691 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
692 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
693 | Args); |
694 | } else { |
695 | Offset += 8; // Skipped. |
696 | } |
697 | |
698 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
699 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
700 | Args); |
701 | } else { |
702 | Offset += 8; // Skipped. |
703 | } |
704 | |
705 | // Emit argument for hidden dynamic lds size |
706 | if (MFI.isDynamicLDSUsed()) { |
707 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size" , Offset, |
708 | Args); |
709 | } else { |
710 | Offset += 4; // skipped |
711 | } |
712 | |
713 | Offset += 68; // Reserved. |
714 | |
715 | // hidden_private_base and hidden_shared_base are only when the subtarget has |
716 | // ApertureRegs. |
717 | if (!ST.hasApertureRegs()) { |
718 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base" , Offset, Args); |
719 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base" , Offset, Args); |
720 | } else { |
721 | Offset += 8; // Skipped. |
722 | } |
723 | |
724 | if (MFI.getUserSGPRInfo().hasQueuePtr()) |
725 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr" , Offset, Args); |
726 | } |
727 | |
728 | void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM, |
729 | const Function &Func, |
730 | msgpack::MapDocNode Kern) { |
731 | MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern); |
732 | |
733 | if (Func.getFnAttribute(Kind: "uniform-work-group-size" ).getValueAsBool()) |
734 | Kern[".uniform_work_group_size" ] = Kern.getDocument()->getNode(V: 1); |
735 | } |
736 | |
737 | //===----------------------------------------------------------------------===// |
738 | // HSAMetadataStreamerV6 |
739 | //===----------------------------------------------------------------------===// |
740 | |
741 | void MetadataStreamerMsgPackV6::emitVersion() { |
742 | auto Version = HSAMetadataDoc->getArrayNode(); |
743 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6)); |
744 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6)); |
745 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
746 | } |
747 | |
748 | } // end namespace AMDGPU::HSAMD |
749 | } // end namespace llvm |
750 | |