1 | //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// AMDGPU HSA Metadata Streamer. |
11 | /// |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AMDGPUHSAMetadataStreamer.h" |
16 | #include "AMDGPU.h" |
17 | #include "GCNSubtarget.h" |
18 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
19 | #include "SIMachineFunctionInfo.h" |
20 | #include "SIProgramInfo.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/MC/MCContext.h" |
23 | #include "llvm/MC/MCExpr.h" |
24 | using namespace llvm; |
25 | |
26 | static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, |
27 | const DataLayout &DL) { |
28 | Type *Ty = Arg.getType(); |
29 | MaybeAlign ArgAlign; |
30 | if (Arg.hasByRefAttr()) { |
31 | Ty = Arg.getParamByRefType(); |
32 | ArgAlign = Arg.getParamAlign(); |
33 | } |
34 | |
35 | if (!ArgAlign) |
36 | ArgAlign = DL.getABITypeAlign(Ty); |
37 | |
38 | return std::pair(Ty, *ArgAlign); |
39 | } |
40 | |
41 | namespace llvm { |
42 | |
43 | static cl::opt<bool> DumpHSAMetadata( |
44 | "amdgpu-dump-hsa-metadata" , |
45 | cl::desc("Dump AMDGPU HSA Metadata" )); |
46 | static cl::opt<bool> VerifyHSAMetadata( |
47 | "amdgpu-verify-hsa-metadata" , |
48 | cl::desc("Verify AMDGPU HSA Metadata" )); |
49 | |
50 | namespace AMDGPU::HSAMD { |
51 | |
52 | //===----------------------------------------------------------------------===// |
53 | // HSAMetadataStreamerV4 |
54 | //===----------------------------------------------------------------------===// |
55 | |
56 | void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { |
57 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
58 | } |
59 | |
60 | void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { |
61 | errs() << "AMDGPU HSA Metadata Parser Test: " ; |
62 | |
63 | msgpack::Document FromHSAMetadataString; |
64 | |
65 | if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) { |
66 | errs() << "FAIL\n" ; |
67 | return; |
68 | } |
69 | |
70 | std::string ToHSAMetadataString; |
71 | raw_string_ostream StrOS(ToHSAMetadataString); |
72 | FromHSAMetadataString.toYAML(OS&: StrOS); |
73 | |
74 | errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL" ) << '\n'; |
75 | if (HSAMetadataString != ToHSAMetadataString) { |
76 | errs() << "Original input: " << HSAMetadataString << '\n' |
77 | << "Produced output: " << StrOS.str() << '\n'; |
78 | } |
79 | } |
80 | |
81 | std::optional<StringRef> |
82 | MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { |
83 | return StringSwitch<std::optional<StringRef>>(AccQual) |
84 | .Case(S: "read_only" , Value: StringRef("read_only" )) |
85 | .Case(S: "write_only" , Value: StringRef("write_only" )) |
86 | .Case(S: "read_write" , Value: StringRef("read_write" )) |
87 | .Default(Value: std::nullopt); |
88 | } |
89 | |
90 | std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( |
91 | unsigned AddressSpace) const { |
92 | switch (AddressSpace) { |
93 | case AMDGPUAS::PRIVATE_ADDRESS: |
94 | return StringRef("private" ); |
95 | case AMDGPUAS::GLOBAL_ADDRESS: |
96 | return StringRef("global" ); |
97 | case AMDGPUAS::CONSTANT_ADDRESS: |
98 | return StringRef("constant" ); |
99 | case AMDGPUAS::LOCAL_ADDRESS: |
100 | return StringRef("local" ); |
101 | case AMDGPUAS::FLAT_ADDRESS: |
102 | return StringRef("generic" ); |
103 | case AMDGPUAS::REGION_ADDRESS: |
104 | return StringRef("region" ); |
105 | default: |
106 | return std::nullopt; |
107 | } |
108 | } |
109 | |
110 | StringRef |
111 | MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, |
112 | StringRef BaseTypeName) const { |
113 | if (TypeQual.contains(Other: "pipe" )) |
114 | return "pipe" ; |
115 | |
116 | return StringSwitch<StringRef>(BaseTypeName) |
117 | .Case(S: "image1d_t" , Value: "image" ) |
118 | .Case(S: "image1d_array_t" , Value: "image" ) |
119 | .Case(S: "image1d_buffer_t" , Value: "image" ) |
120 | .Case(S: "image2d_t" , Value: "image" ) |
121 | .Case(S: "image2d_array_t" , Value: "image" ) |
122 | .Case(S: "image2d_array_depth_t" , Value: "image" ) |
123 | .Case(S: "image2d_array_msaa_t" , Value: "image" ) |
124 | .Case(S: "image2d_array_msaa_depth_t" , Value: "image" ) |
125 | .Case(S: "image2d_depth_t" , Value: "image" ) |
126 | .Case(S: "image2d_msaa_t" , Value: "image" ) |
127 | .Case(S: "image2d_msaa_depth_t" , Value: "image" ) |
128 | .Case(S: "image3d_t" , Value: "image" ) |
129 | .Case(S: "sampler_t" , Value: "sampler" ) |
130 | .Case(S: "queue_t" , Value: "queue" ) |
131 | .Default(Value: isa<PointerType>(Val: Ty) |
132 | ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS |
133 | ? "dynamic_shared_pointer" |
134 | : "global_buffer" ) |
135 | : "by_value" ); |
136 | } |
137 | |
138 | std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, |
139 | bool Signed) const { |
140 | switch (Ty->getTypeID()) { |
141 | case Type::IntegerTyID: { |
142 | if (!Signed) |
143 | return (Twine('u') + getTypeName(Ty, Signed: true)).str(); |
144 | |
145 | auto BitWidth = Ty->getIntegerBitWidth(); |
146 | switch (BitWidth) { |
147 | case 8: |
148 | return "char" ; |
149 | case 16: |
150 | return "short" ; |
151 | case 32: |
152 | return "int" ; |
153 | case 64: |
154 | return "long" ; |
155 | default: |
156 | return (Twine('i') + Twine(BitWidth)).str(); |
157 | } |
158 | } |
159 | case Type::HalfTyID: |
160 | return "half" ; |
161 | case Type::FloatTyID: |
162 | return "float" ; |
163 | case Type::DoubleTyID: |
164 | return "double" ; |
165 | case Type::FixedVectorTyID: { |
166 | auto VecTy = cast<FixedVectorType>(Val: Ty); |
167 | auto ElTy = VecTy->getElementType(); |
168 | auto NumElements = VecTy->getNumElements(); |
169 | return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str(); |
170 | } |
171 | default: |
172 | return "unknown" ; |
173 | } |
174 | } |
175 | |
176 | msgpack::ArrayDocNode |
177 | MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { |
178 | auto Dims = HSAMetadataDoc->getArrayNode(); |
179 | if (Node->getNumOperands() != 3) |
180 | return Dims; |
181 | |
182 | for (auto &Op : Node->operands()) |
183 | Dims.push_back(N: Dims.getDocument()->getNode( |
184 | V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue()))); |
185 | return Dims; |
186 | } |
187 | |
188 | void MetadataStreamerMsgPackV4::emitVersion() { |
189 | auto Version = HSAMetadataDoc->getArrayNode(); |
190 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4)); |
191 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4)); |
192 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
193 | } |
194 | |
195 | void MetadataStreamerMsgPackV4::emitTargetID( |
196 | const IsaInfo::AMDGPUTargetID &TargetID) { |
197 | getRootMetadata(Key: "amdhsa.target" ) = |
198 | HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true); |
199 | } |
200 | |
201 | void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { |
202 | auto Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts" ); |
203 | if (!Node) |
204 | return; |
205 | |
206 | auto Printf = HSAMetadataDoc->getArrayNode(); |
207 | for (auto *Op : Node->operands()) |
208 | if (Op->getNumOperands()) |
209 | Printf.push_back(N: Printf.getDocument()->getNode( |
210 | V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true)); |
211 | getRootMetadata(Key: "amdhsa.printf" ) = Printf; |
212 | } |
213 | |
214 | void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, |
215 | msgpack::MapDocNode Kern) { |
216 | // TODO: What about other languages? |
217 | auto Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version" ); |
218 | if (!Node || !Node->getNumOperands()) |
219 | return; |
220 | auto Op0 = Node->getOperand(i: 0); |
221 | if (Op0->getNumOperands() <= 1) |
222 | return; |
223 | |
224 | Kern[".language" ] = Kern.getDocument()->getNode(V: "OpenCL C" ); |
225 | auto LanguageVersion = Kern.getDocument()->getArrayNode(); |
226 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
227 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue())); |
228 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
229 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue())); |
230 | Kern[".language_version" ] = LanguageVersion; |
231 | } |
232 | |
233 | void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func, |
234 | msgpack::MapDocNode Kern) { |
235 | |
236 | if (auto Node = Func.getMetadata(Kind: "reqd_work_group_size" )) |
237 | Kern[".reqd_workgroup_size" ] = getWorkGroupDimensions(Node); |
238 | if (auto Node = Func.getMetadata(Kind: "work_group_size_hint" )) |
239 | Kern[".workgroup_size_hint" ] = getWorkGroupDimensions(Node); |
240 | if (auto Node = Func.getMetadata(Kind: "vec_type_hint" )) { |
241 | Kern[".vec_type_hint" ] = Kern.getDocument()->getNode( |
242 | V: getTypeName( |
243 | Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(), |
244 | Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()), |
245 | /*Copy=*/true); |
246 | } |
247 | if (Func.hasFnAttribute(Kind: "runtime-handle" )) { |
248 | Kern[".device_enqueue_symbol" ] = Kern.getDocument()->getNode( |
249 | V: Func.getFnAttribute(Kind: "runtime-handle" ).getValueAsString().str(), |
250 | /*Copy=*/true); |
251 | } |
252 | if (Func.hasFnAttribute(Kind: "device-init" )) |
253 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "init" ); |
254 | else if (Func.hasFnAttribute(Kind: "device-fini" )) |
255 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "fini" ); |
256 | } |
257 | |
258 | void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, |
259 | msgpack::MapDocNode Kern) { |
260 | auto &Func = MF.getFunction(); |
261 | unsigned Offset = 0; |
262 | auto Args = HSAMetadataDoc->getArrayNode(); |
263 | for (auto &Arg : Func.args()) |
264 | emitKernelArg(Arg, Offset, Args); |
265 | |
266 | emitHiddenKernelArgs(MF, Offset, Args); |
267 | |
268 | Kern[".args" ] = Args; |
269 | } |
270 | |
271 | void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, |
272 | unsigned &Offset, |
273 | msgpack::ArrayDocNode Args) { |
274 | auto Func = Arg.getParent(); |
275 | auto ArgNo = Arg.getArgNo(); |
276 | const MDNode *Node; |
277 | |
278 | StringRef Name; |
279 | Node = Func->getMetadata(Kind: "kernel_arg_name" ); |
280 | if (Node && ArgNo < Node->getNumOperands()) |
281 | Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
282 | else if (Arg.hasName()) |
283 | Name = Arg.getName(); |
284 | |
285 | StringRef TypeName; |
286 | Node = Func->getMetadata(Kind: "kernel_arg_type" ); |
287 | if (Node && ArgNo < Node->getNumOperands()) |
288 | TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
289 | |
290 | StringRef BaseTypeName; |
291 | Node = Func->getMetadata(Kind: "kernel_arg_base_type" ); |
292 | if (Node && ArgNo < Node->getNumOperands()) |
293 | BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
294 | |
295 | StringRef ActAccQual; |
296 | // Do we really need NoAlias check here? |
297 | if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { |
298 | if (Arg.onlyReadsMemory()) |
299 | ActAccQual = "read_only" ; |
300 | else if (Arg.hasAttribute(Kind: Attribute::WriteOnly)) |
301 | ActAccQual = "write_only" ; |
302 | } |
303 | |
304 | StringRef AccQual; |
305 | Node = Func->getMetadata(Kind: "kernel_arg_access_qual" ); |
306 | if (Node && ArgNo < Node->getNumOperands()) |
307 | AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
308 | |
309 | StringRef TypeQual; |
310 | Node = Func->getMetadata(Kind: "kernel_arg_type_qual" ); |
311 | if (Node && ArgNo < Node->getNumOperands()) |
312 | TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
313 | |
314 | const DataLayout &DL = Func->getDataLayout(); |
315 | |
316 | MaybeAlign PointeeAlign; |
317 | Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); |
318 | |
319 | // FIXME: Need to distinguish in memory alignment from pointer alignment. |
320 | if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) { |
321 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
322 | PointeeAlign = Arg.getParamAlign().valueOrOne(); |
323 | } |
324 | |
325 | // There's no distinction between byval aggregates and raw aggregates. |
326 | Type *ArgTy; |
327 | Align ArgAlign; |
328 | std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL); |
329 | |
330 | emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign, |
331 | ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args, |
332 | PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, |
333 | AccQual, TypeQual); |
334 | } |
335 | |
336 | void MetadataStreamerMsgPackV4::emitKernelArg( |
337 | const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, |
338 | unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, |
339 | StringRef Name, StringRef TypeName, StringRef BaseTypeName, |
340 | StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { |
341 | auto Arg = Args.getDocument()->getMapNode(); |
342 | |
343 | if (!Name.empty()) |
344 | Arg[".name" ] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true); |
345 | if (!TypeName.empty()) |
346 | Arg[".type_name" ] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true); |
347 | auto Size = DL.getTypeAllocSize(Ty); |
348 | Arg[".size" ] = Arg.getDocument()->getNode(V: Size); |
349 | Offset = alignTo(Size: Offset, A: Alignment); |
350 | Arg[".offset" ] = Arg.getDocument()->getNode(V: Offset); |
351 | Offset += Size; |
352 | Arg[".value_kind" ] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true); |
353 | if (PointeeAlign) |
354 | Arg[".pointee_align" ] = Arg.getDocument()->getNode(V: PointeeAlign->value()); |
355 | |
356 | if (auto PtrTy = dyn_cast<PointerType>(Val: Ty)) |
357 | if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace())) |
358 | // Limiting address space to emit only for a certain ValueKind. |
359 | if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer" ) |
360 | Arg[".address_space" ] = Arg.getDocument()->getNode(V: *Qualifier, |
361 | /*Copy=*/true); |
362 | |
363 | if (auto AQ = getAccessQualifier(AccQual)) |
364 | Arg[".access" ] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true); |
365 | |
366 | if (auto AAQ = getAccessQualifier(AccQual: ActAccQual)) |
367 | Arg[".actual_access" ] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true); |
368 | |
369 | SmallVector<StringRef, 1> SplitTypeQuals; |
370 | TypeQual.split(A&: SplitTypeQuals, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
371 | for (StringRef Key : SplitTypeQuals) { |
372 | if (Key == "const" ) |
373 | Arg[".is_const" ] = Arg.getDocument()->getNode(V: true); |
374 | else if (Key == "restrict" ) |
375 | Arg[".is_restrict" ] = Arg.getDocument()->getNode(V: true); |
376 | else if (Key == "volatile" ) |
377 | Arg[".is_volatile" ] = Arg.getDocument()->getNode(V: true); |
378 | else if (Key == "pipe" ) |
379 | Arg[".is_pipe" ] = Arg.getDocument()->getNode(V: true); |
380 | } |
381 | |
382 | Args.push_back(N: Arg); |
383 | } |
384 | |
385 | void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( |
386 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
387 | auto &Func = MF.getFunction(); |
388 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
389 | |
390 | unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(F: Func); |
391 | if (!HiddenArgNumBytes) |
392 | return; |
393 | |
394 | const Module *M = Func.getParent(); |
395 | auto &DL = M->getDataLayout(); |
396 | auto Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
397 | |
398 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
399 | |
400 | if (HiddenArgNumBytes >= 8) |
401 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, |
402 | Args); |
403 | if (HiddenArgNumBytes >= 16) |
404 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, |
405 | Args); |
406 | if (HiddenArgNumBytes >= 24) |
407 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, |
408 | Args); |
409 | |
410 | auto Int8PtrTy = |
411 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
412 | |
413 | if (HiddenArgNumBytes >= 32) { |
414 | // We forbid the use of features requiring hostcall when compiling OpenCL |
415 | // before code object V5, which makes the mutual exclusion between the |
416 | // "printf buffer" and "hostcall buffer" here sound. |
417 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) |
418 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
419 | Args); |
420 | else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) |
421 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
422 | Args); |
423 | else |
424 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
425 | } |
426 | |
427 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
428 | // used, otherwise emit dummy "none" arguments. |
429 | if (HiddenArgNumBytes >= 40) { |
430 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
431 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
432 | Args); |
433 | } else { |
434 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
435 | } |
436 | } |
437 | |
438 | if (HiddenArgNumBytes >= 48) { |
439 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
440 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
441 | Args); |
442 | } else { |
443 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
444 | } |
445 | } |
446 | |
447 | // Emit the pointer argument for multi-grid object. |
448 | if (HiddenArgNumBytes >= 56) { |
449 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
450 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
451 | Args); |
452 | } else { |
453 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
454 | } |
455 | } |
456 | } |
457 | |
458 | msgpack::MapDocNode |
459 | MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, |
460 | const SIProgramInfo &ProgramInfo, |
461 | unsigned CodeObjectVersion) const { |
462 | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
463 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
464 | const Function &F = MF.getFunction(); |
465 | |
466 | auto Kern = HSAMetadataDoc->getMapNode(); |
467 | |
468 | Align MaxKernArgAlign; |
469 | Kern[".kernarg_segment_size" ] = Kern.getDocument()->getNode( |
470 | V: STM.getKernArgSegmentSize(F, MaxAlign&: MaxKernArgAlign)); |
471 | Kern[".group_segment_fixed_size" ] = |
472 | Kern.getDocument()->getNode(V: ProgramInfo.LDSSize); |
473 | DelayedExprs->assignDocNode(DN&: Kern[".private_segment_fixed_size" ], |
474 | Type: msgpack::Type::UInt, ExprValue: ProgramInfo.ScratchSize); |
475 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) { |
476 | DelayedExprs->assignDocNode(DN&: Kern[".uses_dynamic_stack" ], |
477 | Type: msgpack::Type::Boolean, |
478 | ExprValue: ProgramInfo.DynamicCallStack); |
479 | } |
480 | |
481 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) |
482 | Kern[".workgroup_processor_mode" ] = |
483 | Kern.getDocument()->getNode(V: ProgramInfo.WgpMode); |
484 | |
485 | // FIXME: The metadata treats the minimum as 16? |
486 | Kern[".kernarg_segment_align" ] = |
487 | Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value()); |
488 | Kern[".wavefront_size" ] = |
489 | Kern.getDocument()->getNode(V: STM.getWavefrontSize()); |
490 | DelayedExprs->assignDocNode(DN&: Kern[".sgpr_count" ], Type: msgpack::Type::UInt, |
491 | ExprValue: ProgramInfo.NumSGPR); |
492 | DelayedExprs->assignDocNode(DN&: Kern[".vgpr_count" ], Type: msgpack::Type::UInt, |
493 | ExprValue: ProgramInfo.NumVGPR); |
494 | |
495 | // Only add AGPR count to metadata for supported devices |
496 | if (STM.hasMAIInsts()) { |
497 | DelayedExprs->assignDocNode(DN&: Kern[".agpr_count" ], Type: msgpack::Type::UInt, |
498 | ExprValue: ProgramInfo.NumAccVGPR); |
499 | } |
500 | |
501 | Kern[".max_flat_workgroup_size" ] = |
502 | Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize()); |
503 | unsigned NumWGX = MFI.getMaxNumWorkGroupsX(); |
504 | unsigned NumWGY = MFI.getMaxNumWorkGroupsY(); |
505 | unsigned NumWGZ = MFI.getMaxNumWorkGroupsZ(); |
506 | if (NumWGX != 0 && NumWGY != 0 && NumWGZ != 0) { |
507 | Kern[".max_num_workgroups_x" ] = Kern.getDocument()->getNode(V: NumWGX); |
508 | Kern[".max_num_workgroups_y" ] = Kern.getDocument()->getNode(V: NumWGY); |
509 | Kern[".max_num_workgroups_z" ] = Kern.getDocument()->getNode(V: NumWGZ); |
510 | } |
511 | Kern[".sgpr_spill_count" ] = |
512 | Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs()); |
513 | Kern[".vgpr_spill_count" ] = |
514 | Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs()); |
515 | |
516 | return Kern; |
517 | } |
518 | |
519 | bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
520 | DelayedExprs->resolveDelayedExpressions(); |
521 | return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true); |
522 | } |
523 | |
524 | void MetadataStreamerMsgPackV4::begin(const Module &Mod, |
525 | const IsaInfo::AMDGPUTargetID &TargetID) { |
526 | emitVersion(); |
527 | emitTargetID(TargetID); |
528 | emitPrintf(Mod); |
529 | getRootMetadata(Key: "amdhsa.kernels" ) = HSAMetadataDoc->getArrayNode(); |
530 | DelayedExprs->clear(); |
531 | } |
532 | |
533 | void MetadataStreamerMsgPackV4::end() { |
534 | DelayedExprs->resolveDelayedExpressions(); |
535 | std::string HSAMetadataString; |
536 | raw_string_ostream StrOS(HSAMetadataString); |
537 | HSAMetadataDoc->toYAML(OS&: StrOS); |
538 | |
539 | if (DumpHSAMetadata) |
540 | dump(HSAMetadataString: StrOS.str()); |
541 | if (VerifyHSAMetadata) |
542 | verify(HSAMetadataString: StrOS.str()); |
543 | } |
544 | |
545 | void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, |
546 | const SIProgramInfo &ProgramInfo) { |
547 | auto &Func = MF.getFunction(); |
548 | if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && |
549 | Func.getCallingConv() != CallingConv::SPIR_KERNEL) |
550 | return; |
551 | |
552 | auto CodeObjectVersion = |
553 | AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent()); |
554 | auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); |
555 | |
556 | auto Kernels = |
557 | getRootMetadata(Key: "amdhsa.kernels" ).getArray(/*Convert=*/true); |
558 | |
559 | { |
560 | Kern[".name" ] = Kern.getDocument()->getNode(V: Func.getName()); |
561 | Kern[".symbol" ] = Kern.getDocument()->getNode( |
562 | V: (Twine(Func.getName()) + Twine(".kd" )).str(), /*Copy=*/true); |
563 | emitKernelLanguage(Func, Kern); |
564 | emitKernelAttrs(Func, Kern); |
565 | emitKernelArgs(MF, Kern); |
566 | } |
567 | |
568 | Kernels.push_back(N: Kern); |
569 | } |
570 | |
571 | //===----------------------------------------------------------------------===// |
572 | // HSAMetadataStreamerV5 |
573 | //===----------------------------------------------------------------------===// |
574 | |
575 | void MetadataStreamerMsgPackV5::emitVersion() { |
576 | auto Version = HSAMetadataDoc->getArrayNode(); |
577 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5)); |
578 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5)); |
579 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
580 | } |
581 | |
582 | void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( |
583 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
584 | auto &Func = MF.getFunction(); |
585 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
586 | |
587 | // No implicit kernel argument is used. |
588 | if (ST.getImplicitArgNumBytes(F: Func) == 0) |
589 | return; |
590 | |
591 | const Module *M = Func.getParent(); |
592 | auto &DL = M->getDataLayout(); |
593 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
594 | |
595 | auto Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
596 | auto Int32Ty = Type::getInt32Ty(C&: Func.getContext()); |
597 | auto Int16Ty = Type::getInt16Ty(C&: Func.getContext()); |
598 | |
599 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
600 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x" , Offset, Args); |
601 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y" , Offset, Args); |
602 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z" , Offset, Args); |
603 | |
604 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x" , Offset, Args); |
605 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y" , Offset, Args); |
606 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z" , Offset, Args); |
607 | |
608 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x" , Offset, Args); |
609 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y" , Offset, Args); |
610 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z" , Offset, Args); |
611 | |
612 | // Reserved for hidden_tool_correlation_id. |
613 | Offset += 8; |
614 | |
615 | Offset += 8; // Reserved. |
616 | |
617 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, Args); |
618 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, Args); |
619 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, Args); |
620 | |
621 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims" , Offset, Args); |
622 | |
623 | Offset += 6; // Reserved. |
624 | auto Int8PtrTy = |
625 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
626 | |
627 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) { |
628 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
629 | Args); |
630 | } else { |
631 | Offset += 8; // Skipped. |
632 | } |
633 | |
634 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) { |
635 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
636 | Args); |
637 | } else { |
638 | Offset += 8; // Skipped. |
639 | } |
640 | |
641 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
642 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
643 | Args); |
644 | } else { |
645 | Offset += 8; // Skipped. |
646 | } |
647 | |
648 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr" )) |
649 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1" , Offset, Args); |
650 | else |
651 | Offset += 8; // Skipped. |
652 | |
653 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
654 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
655 | Args); |
656 | } else { |
657 | Offset += 8; // Skipped. |
658 | } |
659 | |
660 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
661 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
662 | Args); |
663 | } else { |
664 | Offset += 8; // Skipped. |
665 | } |
666 | |
667 | // Emit argument for hidden dynamic lds size |
668 | if (MFI.isDynamicLDSUsed()) { |
669 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size" , Offset, |
670 | Args); |
671 | } else { |
672 | Offset += 4; // skipped |
673 | } |
674 | |
675 | Offset += 68; // Reserved. |
676 | |
677 | // hidden_private_base and hidden_shared_base are only when the subtarget has |
678 | // ApertureRegs. |
679 | if (!ST.hasApertureRegs()) { |
680 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base" , Offset, Args); |
681 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base" , Offset, Args); |
682 | } else { |
683 | Offset += 8; // Skipped. |
684 | } |
685 | |
686 | if (MFI.getUserSGPRInfo().hasQueuePtr()) |
687 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr" , Offset, Args); |
688 | } |
689 | |
690 | void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func, |
691 | msgpack::MapDocNode Kern) { |
692 | MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern); |
693 | |
694 | if (Func.getFnAttribute(Kind: "uniform-work-group-size" ).getValueAsBool()) |
695 | Kern[".uniform_work_group_size" ] = Kern.getDocument()->getNode(V: 1); |
696 | } |
697 | |
698 | //===----------------------------------------------------------------------===// |
699 | // HSAMetadataStreamerV6 |
700 | //===----------------------------------------------------------------------===// |
701 | |
702 | void MetadataStreamerMsgPackV6::emitVersion() { |
703 | auto Version = HSAMetadataDoc->getArrayNode(); |
704 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6)); |
705 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6)); |
706 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
707 | } |
708 | |
709 | } // end namespace AMDGPU::HSAMD |
710 | } // end namespace llvm |
711 | |