| 1 | //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// AMDGPU HSA Metadata Streamer. |
| 11 | /// |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "AMDGPUHSAMetadataStreamer.h" |
| 16 | #include "AMDGPU.h" |
| 17 | #include "GCNSubtarget.h" |
| 18 | #include "MCTargetDesc/AMDGPUTargetStreamer.h" |
| 19 | #include "SIMachineFunctionInfo.h" |
| 20 | #include "SIProgramInfo.h" |
| 21 | #include "llvm/IR/Module.h" |
| 22 | #include "llvm/MC/MCContext.h" |
| 23 | #include "llvm/MC/MCExpr.h" |
| 24 | #include "llvm/Target/TargetLoweringObjectFile.h" |
| 25 | |
| 26 | using namespace llvm; |
| 27 | |
| 28 | static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg, |
| 29 | const DataLayout &DL) { |
| 30 | Type *Ty = Arg.getType(); |
| 31 | MaybeAlign ArgAlign; |
| 32 | if (Arg.hasByRefAttr()) { |
| 33 | Ty = Arg.getParamByRefType(); |
| 34 | ArgAlign = Arg.getParamAlign(); |
| 35 | } |
| 36 | |
| 37 | if (!ArgAlign) |
| 38 | ArgAlign = DL.getABITypeAlign(Ty); |
| 39 | |
| 40 | return std::pair(Ty, *ArgAlign); |
| 41 | } |
| 42 | |
| 43 | /// Find the mangled symbol name for the runtime handle for \p EnqueuedBlock |
| 44 | static std::string getEnqueuedBlockSymbolName(const AMDGPUTargetMachine &TM, |
| 45 | const Function &EnqueuedBlock) { |
| 46 | const MDNode *Associated = |
| 47 | EnqueuedBlock.getMetadata(KindID: LLVMContext::MD_associated); |
| 48 | if (!Associated) |
| 49 | return "" ; |
| 50 | |
| 51 | auto *VM = cast<ValueAsMetadata>(Val: Associated->getOperand(I: 0)); |
| 52 | auto *RuntimeHandle = |
| 53 | dyn_cast<GlobalVariable>(Val: VM->getValue()->stripPointerCasts()); |
| 54 | if (!RuntimeHandle || |
| 55 | RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle" ) |
| 56 | return "" ; |
| 57 | |
| 58 | SmallString<128> Name; |
| 59 | TM.getNameWithPrefix(Name, GV: RuntimeHandle, |
| 60 | Mang&: TM.getObjFileLowering()->getMangler()); |
| 61 | return Name.str().str(); |
| 62 | } |
| 63 | |
| 64 | namespace llvm { |
| 65 | |
| 66 | static cl::opt<bool> DumpHSAMetadata( |
| 67 | "amdgpu-dump-hsa-metadata" , |
| 68 | cl::desc("Dump AMDGPU HSA Metadata" )); |
| 69 | static cl::opt<bool> VerifyHSAMetadata( |
| 70 | "amdgpu-verify-hsa-metadata" , |
| 71 | cl::desc("Verify AMDGPU HSA Metadata" )); |
| 72 | |
| 73 | namespace AMDGPU::HSAMD { |
| 74 | |
| 75 | //===----------------------------------------------------------------------===// |
| 76 | // HSAMetadataStreamerV4 |
| 77 | //===----------------------------------------------------------------------===// |
| 78 | |
| 79 | void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const { |
| 80 | errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; |
| 81 | } |
| 82 | |
| 83 | void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const { |
| 84 | errs() << "AMDGPU HSA Metadata Parser Test: " ; |
| 85 | |
| 86 | msgpack::Document FromHSAMetadataString; |
| 87 | |
| 88 | if (!FromHSAMetadataString.fromYAML(S: HSAMetadataString)) { |
| 89 | errs() << "FAIL\n" ; |
| 90 | return; |
| 91 | } |
| 92 | |
| 93 | std::string ToHSAMetadataString; |
| 94 | raw_string_ostream StrOS(ToHSAMetadataString); |
| 95 | FromHSAMetadataString.toYAML(OS&: StrOS); |
| 96 | |
| 97 | errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL" ) << '\n'; |
| 98 | if (HSAMetadataString != ToHSAMetadataString) { |
| 99 | errs() << "Original input: " << HSAMetadataString << '\n' |
| 100 | << "Produced output: " << StrOS.str() << '\n'; |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | std::optional<StringRef> |
| 105 | MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const { |
| 106 | return StringSwitch<std::optional<StringRef>>(AccQual) |
| 107 | .Case(S: "read_only" , Value: StringRef("read_only" )) |
| 108 | .Case(S: "write_only" , Value: StringRef("write_only" )) |
| 109 | .Case(S: "read_write" , Value: StringRef("read_write" )) |
| 110 | .Default(Value: std::nullopt); |
| 111 | } |
| 112 | |
| 113 | std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier( |
| 114 | unsigned AddressSpace) const { |
| 115 | switch (AddressSpace) { |
| 116 | case AMDGPUAS::PRIVATE_ADDRESS: |
| 117 | return StringRef("private" ); |
| 118 | case AMDGPUAS::GLOBAL_ADDRESS: |
| 119 | return StringRef("global" ); |
| 120 | case AMDGPUAS::CONSTANT_ADDRESS: |
| 121 | return StringRef("constant" ); |
| 122 | case AMDGPUAS::LOCAL_ADDRESS: |
| 123 | return StringRef("local" ); |
| 124 | case AMDGPUAS::FLAT_ADDRESS: |
| 125 | return StringRef("generic" ); |
| 126 | case AMDGPUAS::REGION_ADDRESS: |
| 127 | return StringRef("region" ); |
| 128 | default: |
| 129 | return std::nullopt; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | StringRef |
| 134 | MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual, |
| 135 | StringRef BaseTypeName) const { |
| 136 | if (TypeQual.contains(Other: "pipe" )) |
| 137 | return "pipe" ; |
| 138 | |
| 139 | return StringSwitch<StringRef>(BaseTypeName) |
| 140 | .Case(S: "image1d_t" , Value: "image" ) |
| 141 | .Case(S: "image1d_array_t" , Value: "image" ) |
| 142 | .Case(S: "image1d_buffer_t" , Value: "image" ) |
| 143 | .Case(S: "image2d_t" , Value: "image" ) |
| 144 | .Case(S: "image2d_array_t" , Value: "image" ) |
| 145 | .Case(S: "image2d_array_depth_t" , Value: "image" ) |
| 146 | .Case(S: "image2d_array_msaa_t" , Value: "image" ) |
| 147 | .Case(S: "image2d_array_msaa_depth_t" , Value: "image" ) |
| 148 | .Case(S: "image2d_depth_t" , Value: "image" ) |
| 149 | .Case(S: "image2d_msaa_t" , Value: "image" ) |
| 150 | .Case(S: "image2d_msaa_depth_t" , Value: "image" ) |
| 151 | .Case(S: "image3d_t" , Value: "image" ) |
| 152 | .Case(S: "sampler_t" , Value: "sampler" ) |
| 153 | .Case(S: "queue_t" , Value: "queue" ) |
| 154 | .Default(Value: isa<PointerType>(Val: Ty) |
| 155 | ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS |
| 156 | ? "dynamic_shared_pointer" |
| 157 | : "global_buffer" ) |
| 158 | : "by_value" ); |
| 159 | } |
| 160 | |
| 161 | std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty, |
| 162 | bool Signed) const { |
| 163 | switch (Ty->getTypeID()) { |
| 164 | case Type::IntegerTyID: { |
| 165 | if (!Signed) |
| 166 | return (Twine('u') + getTypeName(Ty, Signed: true)).str(); |
| 167 | |
| 168 | auto BitWidth = Ty->getIntegerBitWidth(); |
| 169 | switch (BitWidth) { |
| 170 | case 8: |
| 171 | return "char" ; |
| 172 | case 16: |
| 173 | return "short" ; |
| 174 | case 32: |
| 175 | return "int" ; |
| 176 | case 64: |
| 177 | return "long" ; |
| 178 | default: |
| 179 | return (Twine('i') + Twine(BitWidth)).str(); |
| 180 | } |
| 181 | } |
| 182 | case Type::HalfTyID: |
| 183 | return "half" ; |
| 184 | case Type::FloatTyID: |
| 185 | return "float" ; |
| 186 | case Type::DoubleTyID: |
| 187 | return "double" ; |
| 188 | case Type::FixedVectorTyID: { |
| 189 | auto *VecTy = cast<FixedVectorType>(Val: Ty); |
| 190 | auto *ElTy = VecTy->getElementType(); |
| 191 | auto NumElements = VecTy->getNumElements(); |
| 192 | return (Twine(getTypeName(Ty: ElTy, Signed)) + Twine(NumElements)).str(); |
| 193 | } |
| 194 | default: |
| 195 | return "unknown" ; |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | msgpack::ArrayDocNode |
| 200 | MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const { |
| 201 | auto Dims = HSAMetadataDoc->getArrayNode(); |
| 202 | if (Node->getNumOperands() != 3) |
| 203 | return Dims; |
| 204 | |
| 205 | for (auto &Op : Node->operands()) |
| 206 | Dims.push_back(N: Dims.getDocument()->getNode( |
| 207 | V: uint64_t(mdconst::extract<ConstantInt>(MD: Op)->getZExtValue()))); |
| 208 | return Dims; |
| 209 | } |
| 210 | |
| 211 | void MetadataStreamerMsgPackV4::emitVersion() { |
| 212 | auto Version = HSAMetadataDoc->getArrayNode(); |
| 213 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV4)); |
| 214 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV4)); |
| 215 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
| 216 | } |
| 217 | |
| 218 | void MetadataStreamerMsgPackV4::emitTargetID( |
| 219 | const IsaInfo::AMDGPUTargetID &TargetID) { |
| 220 | getRootMetadata(Key: "amdhsa.target" ) = |
| 221 | HSAMetadataDoc->getNode(V: TargetID.toString(), /*Copy=*/true); |
| 222 | } |
| 223 | |
| 224 | void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) { |
| 225 | auto *Node = Mod.getNamedMetadata(Name: "llvm.printf.fmts" ); |
| 226 | if (!Node) |
| 227 | return; |
| 228 | |
| 229 | auto Printf = HSAMetadataDoc->getArrayNode(); |
| 230 | for (auto *Op : Node->operands()) |
| 231 | if (Op->getNumOperands()) |
| 232 | Printf.push_back(N: Printf.getDocument()->getNode( |
| 233 | V: cast<MDString>(Val: Op->getOperand(I: 0))->getString(), /*Copy=*/true)); |
| 234 | getRootMetadata(Key: "amdhsa.printf" ) = Printf; |
| 235 | } |
| 236 | |
| 237 | void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func, |
| 238 | msgpack::MapDocNode Kern) { |
| 239 | // TODO: What about other languages? |
| 240 | auto *Node = Func.getParent()->getNamedMetadata(Name: "opencl.ocl.version" ); |
| 241 | if (!Node || !Node->getNumOperands()) |
| 242 | return; |
| 243 | auto *Op0 = Node->getOperand(i: 0); |
| 244 | if (Op0->getNumOperands() <= 1) |
| 245 | return; |
| 246 | |
| 247 | Kern[".language" ] = Kern.getDocument()->getNode(V: "OpenCL C" ); |
| 248 | auto LanguageVersion = Kern.getDocument()->getArrayNode(); |
| 249 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
| 250 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 0))->getZExtValue())); |
| 251 | LanguageVersion.push_back(N: Kern.getDocument()->getNode( |
| 252 | V: mdconst::extract<ConstantInt>(MD: Op0->getOperand(I: 1))->getZExtValue())); |
| 253 | Kern[".language_version" ] = LanguageVersion; |
| 254 | } |
| 255 | |
| 256 | void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM, |
| 257 | const Function &Func, |
| 258 | msgpack::MapDocNode Kern) { |
| 259 | |
| 260 | if (auto *Node = Func.getMetadata(Kind: "reqd_work_group_size" )) |
| 261 | Kern[".reqd_workgroup_size" ] = getWorkGroupDimensions(Node); |
| 262 | if (auto *Node = Func.getMetadata(Kind: "work_group_size_hint" )) |
| 263 | Kern[".workgroup_size_hint" ] = getWorkGroupDimensions(Node); |
| 264 | if (auto *Node = Func.getMetadata(Kind: "vec_type_hint" )) { |
| 265 | Kern[".vec_type_hint" ] = Kern.getDocument()->getNode( |
| 266 | V: getTypeName( |
| 267 | Ty: cast<ValueAsMetadata>(Val: Node->getOperand(I: 0))->getType(), |
| 268 | Signed: mdconst::extract<ConstantInt>(MD: Node->getOperand(I: 1))->getZExtValue()), |
| 269 | /*Copy=*/true); |
| 270 | } |
| 271 | |
| 272 | std::string HandleName = getEnqueuedBlockSymbolName(TM, EnqueuedBlock: Func); |
| 273 | if (!HandleName.empty()) { |
| 274 | Kern[".device_enqueue_symbol" ] = |
| 275 | Kern.getDocument()->getNode(V: std::move(HandleName), /*Copy=*/true); |
| 276 | } |
| 277 | |
| 278 | if (Func.hasFnAttribute(Kind: "device-init" )) |
| 279 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "init" ); |
| 280 | else if (Func.hasFnAttribute(Kind: "device-fini" )) |
| 281 | Kern[".kind" ] = Kern.getDocument()->getNode(V: "fini" ); |
| 282 | } |
| 283 | |
| 284 | void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF, |
| 285 | msgpack::MapDocNode Kern) { |
| 286 | auto &Func = MF.getFunction(); |
| 287 | unsigned Offset = 0; |
| 288 | auto Args = HSAMetadataDoc->getArrayNode(); |
| 289 | for (auto &Arg : Func.args()) { |
| 290 | if (Arg.hasAttribute(Kind: "amdgpu-hidden-argument" )) |
| 291 | continue; |
| 292 | |
| 293 | emitKernelArg(Arg, Offset, Args); |
| 294 | } |
| 295 | |
| 296 | emitHiddenKernelArgs(MF, Offset, Args); |
| 297 | |
| 298 | Kern[".args" ] = Args; |
| 299 | } |
| 300 | |
| 301 | void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg, |
| 302 | unsigned &Offset, |
| 303 | msgpack::ArrayDocNode Args) { |
| 304 | const auto *Func = Arg.getParent(); |
| 305 | auto ArgNo = Arg.getArgNo(); |
| 306 | const MDNode *Node; |
| 307 | |
| 308 | StringRef Name; |
| 309 | Node = Func->getMetadata(Kind: "kernel_arg_name" ); |
| 310 | if (Node && ArgNo < Node->getNumOperands()) |
| 311 | Name = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
| 312 | else if (Arg.hasName()) |
| 313 | Name = Arg.getName(); |
| 314 | |
| 315 | StringRef TypeName; |
| 316 | Node = Func->getMetadata(Kind: "kernel_arg_type" ); |
| 317 | if (Node && ArgNo < Node->getNumOperands()) |
| 318 | TypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
| 319 | |
| 320 | StringRef BaseTypeName; |
| 321 | Node = Func->getMetadata(Kind: "kernel_arg_base_type" ); |
| 322 | if (Node && ArgNo < Node->getNumOperands()) |
| 323 | BaseTypeName = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
| 324 | |
| 325 | StringRef ActAccQual; |
| 326 | // Do we really need NoAlias check here? |
| 327 | if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) { |
| 328 | if (Arg.onlyReadsMemory()) |
| 329 | ActAccQual = "read_only" ; |
| 330 | else if (Arg.hasAttribute(Kind: Attribute::WriteOnly)) |
| 331 | ActAccQual = "write_only" ; |
| 332 | } |
| 333 | |
| 334 | StringRef AccQual; |
| 335 | Node = Func->getMetadata(Kind: "kernel_arg_access_qual" ); |
| 336 | if (Node && ArgNo < Node->getNumOperands()) |
| 337 | AccQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
| 338 | |
| 339 | StringRef TypeQual; |
| 340 | Node = Func->getMetadata(Kind: "kernel_arg_type_qual" ); |
| 341 | if (Node && ArgNo < Node->getNumOperands()) |
| 342 | TypeQual = cast<MDString>(Val: Node->getOperand(I: ArgNo))->getString(); |
| 343 | |
| 344 | const DataLayout &DL = Func->getDataLayout(); |
| 345 | |
| 346 | MaybeAlign PointeeAlign; |
| 347 | Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType(); |
| 348 | |
| 349 | // FIXME: Need to distinguish in memory alignment from pointer alignment. |
| 350 | if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty)) { |
| 351 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
| 352 | PointeeAlign = Arg.getParamAlign().valueOrOne(); |
| 353 | } |
| 354 | |
| 355 | // There's no distinction between byval aggregates and raw aggregates. |
| 356 | Type *ArgTy; |
| 357 | Align ArgAlign; |
| 358 | std::tie(args&: ArgTy, args&: ArgAlign) = getArgumentTypeAlign(Arg, DL); |
| 359 | |
| 360 | emitKernelArg(DL, Ty: ArgTy, Alignment: ArgAlign, |
| 361 | ValueKind: getValueKind(Ty: ArgTy, TypeQual, BaseTypeName), Offset, Args, |
| 362 | PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual, |
| 363 | AccQual, TypeQual); |
| 364 | } |
| 365 | |
| 366 | void MetadataStreamerMsgPackV4::emitKernelArg( |
| 367 | const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind, |
| 368 | unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign, |
| 369 | StringRef Name, StringRef TypeName, StringRef BaseTypeName, |
| 370 | StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) { |
| 371 | auto Arg = Args.getDocument()->getMapNode(); |
| 372 | |
| 373 | if (!Name.empty()) |
| 374 | Arg[".name" ] = Arg.getDocument()->getNode(V: Name, /*Copy=*/true); |
| 375 | if (!TypeName.empty()) |
| 376 | Arg[".type_name" ] = Arg.getDocument()->getNode(V: TypeName, /*Copy=*/true); |
| 377 | auto Size = DL.getTypeAllocSize(Ty); |
| 378 | Arg[".size" ] = Arg.getDocument()->getNode(V: Size); |
| 379 | Offset = alignTo(Size: Offset, A: Alignment); |
| 380 | Arg[".offset" ] = Arg.getDocument()->getNode(V: Offset); |
| 381 | Offset += Size; |
| 382 | Arg[".value_kind" ] = Arg.getDocument()->getNode(V: ValueKind, /*Copy=*/true); |
| 383 | if (PointeeAlign) |
| 384 | Arg[".pointee_align" ] = Arg.getDocument()->getNode(V: PointeeAlign->value()); |
| 385 | |
| 386 | if (auto *PtrTy = dyn_cast<PointerType>(Val: Ty)) |
| 387 | if (auto Qualifier = getAddressSpaceQualifier(AddressSpace: PtrTy->getAddressSpace())) |
| 388 | // Limiting address space to emit only for a certain ValueKind. |
| 389 | if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer" ) |
| 390 | Arg[".address_space" ] = Arg.getDocument()->getNode(V: *Qualifier, |
| 391 | /*Copy=*/true); |
| 392 | |
| 393 | if (auto AQ = getAccessQualifier(AccQual)) |
| 394 | Arg[".access" ] = Arg.getDocument()->getNode(V: *AQ, /*Copy=*/true); |
| 395 | |
| 396 | if (auto AAQ = getAccessQualifier(AccQual: ActAccQual)) |
| 397 | Arg[".actual_access" ] = Arg.getDocument()->getNode(V: *AAQ, /*Copy=*/true); |
| 398 | |
| 399 | SmallVector<StringRef, 1> SplitTypeQuals; |
| 400 | TypeQual.split(A&: SplitTypeQuals, Separator: " " , MaxSplit: -1, KeepEmpty: false); |
| 401 | for (StringRef Key : SplitTypeQuals) { |
| 402 | if (Key == "const" ) |
| 403 | Arg[".is_const" ] = Arg.getDocument()->getNode(V: true); |
| 404 | else if (Key == "restrict" ) |
| 405 | Arg[".is_restrict" ] = Arg.getDocument()->getNode(V: true); |
| 406 | else if (Key == "volatile" ) |
| 407 | Arg[".is_volatile" ] = Arg.getDocument()->getNode(V: true); |
| 408 | else if (Key == "pipe" ) |
| 409 | Arg[".is_pipe" ] = Arg.getDocument()->getNode(V: true); |
| 410 | } |
| 411 | |
| 412 | Args.push_back(N: Arg); |
| 413 | } |
| 414 | |
| 415 | void MetadataStreamerMsgPackV4::emitHiddenKernelArgs( |
| 416 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
| 417 | auto &Func = MF.getFunction(); |
| 418 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 419 | |
| 420 | unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(F: Func); |
| 421 | if (!HiddenArgNumBytes) |
| 422 | return; |
| 423 | |
| 424 | const Module *M = Func.getParent(); |
| 425 | auto &DL = M->getDataLayout(); |
| 426 | auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
| 427 | |
| 428 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
| 429 | |
| 430 | if (HiddenArgNumBytes >= 8) |
| 431 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, |
| 432 | Args); |
| 433 | if (HiddenArgNumBytes >= 16) |
| 434 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, |
| 435 | Args); |
| 436 | if (HiddenArgNumBytes >= 24) |
| 437 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, |
| 438 | Args); |
| 439 | |
| 440 | auto *Int8PtrTy = |
| 441 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
| 442 | |
| 443 | if (HiddenArgNumBytes >= 32) { |
| 444 | // We forbid the use of features requiring hostcall when compiling OpenCL |
| 445 | // before code object V5, which makes the mutual exclusion between the |
| 446 | // "printf buffer" and "hostcall buffer" here sound. |
| 447 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) |
| 448 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
| 449 | Args); |
| 450 | else if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) |
| 451 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
| 452 | Args); |
| 453 | else |
| 454 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
| 455 | } |
| 456 | |
| 457 | // Emit "default queue" and "completion action" arguments if enqueue kernel is |
| 458 | // used, otherwise emit dummy "none" arguments. |
| 459 | if (HiddenArgNumBytes >= 40) { |
| 460 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
| 461 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
| 462 | Args); |
| 463 | } else { |
| 464 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
| 465 | } |
| 466 | } |
| 467 | |
| 468 | if (HiddenArgNumBytes >= 48) { |
| 469 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
| 470 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
| 471 | Args); |
| 472 | } else { |
| 473 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | // Emit the pointer argument for multi-grid object. |
| 478 | if (HiddenArgNumBytes >= 56) { |
| 479 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
| 480 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
| 481 | Args); |
| 482 | } else { |
| 483 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_none" , Offset, Args); |
| 484 | } |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | msgpack::MapDocNode |
| 489 | MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF, |
| 490 | const SIProgramInfo &ProgramInfo, |
| 491 | unsigned CodeObjectVersion) const { |
| 492 | const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>(); |
| 493 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
| 494 | const Function &F = MF.getFunction(); |
| 495 | |
| 496 | auto Kern = HSAMetadataDoc->getMapNode(); |
| 497 | |
| 498 | Align MaxKernArgAlign; |
| 499 | Kern[".kernarg_segment_size" ] = Kern.getDocument()->getNode( |
| 500 | V: STM.getKernArgSegmentSize(F, MaxAlign&: MaxKernArgAlign)); |
| 501 | Kern[".group_segment_fixed_size" ] = |
| 502 | Kern.getDocument()->getNode(V: ProgramInfo.LDSSize); |
| 503 | DelayedExprs->assignDocNode(DN&: Kern[".private_segment_fixed_size" ], |
| 504 | Type: msgpack::Type::UInt, ExprValue: ProgramInfo.ScratchSize); |
| 505 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) { |
| 506 | DelayedExprs->assignDocNode(DN&: Kern[".uses_dynamic_stack" ], |
| 507 | Type: msgpack::Type::Boolean, |
| 508 | ExprValue: ProgramInfo.DynamicCallStack); |
| 509 | } |
| 510 | |
| 511 | if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP()) |
| 512 | Kern[".workgroup_processor_mode" ] = |
| 513 | Kern.getDocument()->getNode(V: ProgramInfo.WgpMode); |
| 514 | |
| 515 | // FIXME: The metadata treats the minimum as 16? |
| 516 | Kern[".kernarg_segment_align" ] = |
| 517 | Kern.getDocument()->getNode(V: std::max(a: Align(4), b: MaxKernArgAlign).value()); |
| 518 | Kern[".wavefront_size" ] = |
| 519 | Kern.getDocument()->getNode(V: STM.getWavefrontSize()); |
| 520 | DelayedExprs->assignDocNode(DN&: Kern[".sgpr_count" ], Type: msgpack::Type::UInt, |
| 521 | ExprValue: ProgramInfo.NumSGPR); |
| 522 | DelayedExprs->assignDocNode(DN&: Kern[".vgpr_count" ], Type: msgpack::Type::UInt, |
| 523 | ExprValue: ProgramInfo.NumVGPR); |
| 524 | |
| 525 | // Only add AGPR count to metadata for supported devices |
| 526 | if (STM.hasMAIInsts()) { |
| 527 | DelayedExprs->assignDocNode(DN&: Kern[".agpr_count" ], Type: msgpack::Type::UInt, |
| 528 | ExprValue: ProgramInfo.NumAccVGPR); |
| 529 | } |
| 530 | |
| 531 | Kern[".max_flat_workgroup_size" ] = |
| 532 | Kern.getDocument()->getNode(V: MFI.getMaxFlatWorkGroupSize()); |
| 533 | |
| 534 | uint32_t NumWGY = MFI.getMaxNumWorkGroupsY(); |
| 535 | uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ(); |
| 536 | uint32_t NumWGX = MFI.getMaxNumWorkGroupsX(); |
| 537 | |
| 538 | // TODO: Should consider 0 invalid and reject in IR verifier. |
| 539 | if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0) |
| 540 | Kern[".max_num_workgroups_x" ] = Kern.getDocument()->getNode(V: NumWGX); |
| 541 | |
| 542 | if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0) |
| 543 | Kern[".max_num_workgroups_y" ] = Kern.getDocument()->getNode(V: NumWGY); |
| 544 | |
| 545 | if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0) |
| 546 | Kern[".max_num_workgroups_z" ] = Kern.getDocument()->getNode(V: NumWGZ); |
| 547 | |
| 548 | Kern[".sgpr_spill_count" ] = |
| 549 | Kern.getDocument()->getNode(V: MFI.getNumSpilledSGPRs()); |
| 550 | Kern[".vgpr_spill_count" ] = |
| 551 | Kern.getDocument()->getNode(V: MFI.getNumSpilledVGPRs()); |
| 552 | |
| 553 | return Kern; |
| 554 | } |
| 555 | |
| 556 | bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { |
| 557 | DelayedExprs->resolveDelayedExpressions(); |
| 558 | return TargetStreamer.EmitHSAMetadata(HSAMetadata&: *HSAMetadataDoc, Strict: true); |
| 559 | } |
| 560 | |
| 561 | void MetadataStreamerMsgPackV4::begin(const Module &Mod, |
| 562 | const IsaInfo::AMDGPUTargetID &TargetID) { |
| 563 | emitVersion(); |
| 564 | emitTargetID(TargetID); |
| 565 | emitPrintf(Mod); |
| 566 | getRootMetadata(Key: "amdhsa.kernels" ) = HSAMetadataDoc->getArrayNode(); |
| 567 | DelayedExprs->clear(); |
| 568 | } |
| 569 | |
| 570 | void MetadataStreamerMsgPackV4::end() { |
| 571 | DelayedExprs->resolveDelayedExpressions(); |
| 572 | std::string HSAMetadataString; |
| 573 | raw_string_ostream StrOS(HSAMetadataString); |
| 574 | HSAMetadataDoc->toYAML(OS&: StrOS); |
| 575 | |
| 576 | if (DumpHSAMetadata) |
| 577 | dump(HSAMetadataString: StrOS.str()); |
| 578 | if (VerifyHSAMetadata) |
| 579 | verify(HSAMetadataString: StrOS.str()); |
| 580 | } |
| 581 | |
| 582 | void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF, |
| 583 | const SIProgramInfo &ProgramInfo) { |
| 584 | auto &Func = MF.getFunction(); |
| 585 | if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL && |
| 586 | Func.getCallingConv() != CallingConv::SPIR_KERNEL) |
| 587 | return; |
| 588 | |
| 589 | auto CodeObjectVersion = |
| 590 | AMDGPU::getAMDHSACodeObjectVersion(M: *Func.getParent()); |
| 591 | auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion); |
| 592 | |
| 593 | auto Kernels = |
| 594 | getRootMetadata(Key: "amdhsa.kernels" ).getArray(/*Convert=*/true); |
| 595 | |
| 596 | auto &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget()); |
| 597 | { |
| 598 | Kern[".name" ] = Kern.getDocument()->getNode(V: Func.getName()); |
| 599 | Kern[".symbol" ] = Kern.getDocument()->getNode( |
| 600 | V: (Twine(Func.getName()) + Twine(".kd" )).str(), /*Copy=*/true); |
| 601 | emitKernelLanguage(Func, Kern); |
| 602 | emitKernelAttrs(TM, Func, Kern); |
| 603 | emitKernelArgs(MF, Kern); |
| 604 | } |
| 605 | |
| 606 | Kernels.push_back(N: Kern); |
| 607 | } |
| 608 | |
| 609 | //===----------------------------------------------------------------------===// |
| 610 | // HSAMetadataStreamerV5 |
| 611 | //===----------------------------------------------------------------------===// |
| 612 | |
| 613 | void MetadataStreamerMsgPackV5::emitVersion() { |
| 614 | auto Version = HSAMetadataDoc->getArrayNode(); |
| 615 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV5)); |
| 616 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV5)); |
| 617 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
| 618 | } |
| 619 | |
| 620 | void MetadataStreamerMsgPackV5::emitHiddenKernelArgs( |
| 621 | const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) { |
| 622 | auto &Func = MF.getFunction(); |
| 623 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 624 | |
| 625 | // No implicit kernel argument is used. |
| 626 | if (ST.getImplicitArgNumBytes(F: Func) == 0) |
| 627 | return; |
| 628 | |
| 629 | const Module *M = Func.getParent(); |
| 630 | auto &DL = M->getDataLayout(); |
| 631 | const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); |
| 632 | |
| 633 | auto *Int64Ty = Type::getInt64Ty(C&: Func.getContext()); |
| 634 | auto *Int32Ty = Type::getInt32Ty(C&: Func.getContext()); |
| 635 | auto *Int16Ty = Type::getInt16Ty(C&: Func.getContext()); |
| 636 | |
| 637 | Offset = alignTo(Size: Offset, A: ST.getAlignmentForImplicitArgPtr()); |
| 638 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_x" , Offset, Args); |
| 639 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_y" , Offset, Args); |
| 640 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_block_count_z" , Offset, Args); |
| 641 | |
| 642 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_x" , Offset, Args); |
| 643 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_y" , Offset, Args); |
| 644 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_group_size_z" , Offset, Args); |
| 645 | |
| 646 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_x" , Offset, Args); |
| 647 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_y" , Offset, Args); |
| 648 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_remainder_z" , Offset, Args); |
| 649 | |
| 650 | // Reserved for hidden_tool_correlation_id. |
| 651 | Offset += 8; |
| 652 | |
| 653 | Offset += 8; // Reserved. |
| 654 | |
| 655 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_x" , Offset, Args); |
| 656 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_y" , Offset, Args); |
| 657 | emitKernelArg(DL, Ty: Int64Ty, Alignment: Align(8), ValueKind: "hidden_global_offset_z" , Offset, Args); |
| 658 | |
| 659 | emitKernelArg(DL, Ty: Int16Ty, Alignment: Align(2), ValueKind: "hidden_grid_dims" , Offset, Args); |
| 660 | |
| 661 | Offset += 6; // Reserved. |
| 662 | auto *Int8PtrTy = |
| 663 | PointerType::get(C&: Func.getContext(), AddressSpace: AMDGPUAS::GLOBAL_ADDRESS); |
| 664 | |
| 665 | if (M->getNamedMetadata(Name: "llvm.printf.fmts" )) { |
| 666 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_printf_buffer" , Offset, |
| 667 | Args); |
| 668 | } else { |
| 669 | Offset += 8; // Skipped. |
| 670 | } |
| 671 | |
| 672 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-hostcall-ptr" )) { |
| 673 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_hostcall_buffer" , Offset, |
| 674 | Args); |
| 675 | } else { |
| 676 | Offset += 8; // Skipped. |
| 677 | } |
| 678 | |
| 679 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-multigrid-sync-arg" )) { |
| 680 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_multigrid_sync_arg" , Offset, |
| 681 | Args); |
| 682 | } else { |
| 683 | Offset += 8; // Skipped. |
| 684 | } |
| 685 | |
| 686 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-heap-ptr" )) |
| 687 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_heap_v1" , Offset, Args); |
| 688 | else |
| 689 | Offset += 8; // Skipped. |
| 690 | |
| 691 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-default-queue" )) { |
| 692 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_default_queue" , Offset, |
| 693 | Args); |
| 694 | } else { |
| 695 | Offset += 8; // Skipped. |
| 696 | } |
| 697 | |
| 698 | if (!Func.hasFnAttribute(Kind: "amdgpu-no-completion-action" )) { |
| 699 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_completion_action" , Offset, |
| 700 | Args); |
| 701 | } else { |
| 702 | Offset += 8; // Skipped. |
| 703 | } |
| 704 | |
| 705 | // Emit argument for hidden dynamic lds size |
| 706 | if (MFI.isDynamicLDSUsed()) { |
| 707 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_dynamic_lds_size" , Offset, |
| 708 | Args); |
| 709 | } else { |
| 710 | Offset += 4; // skipped |
| 711 | } |
| 712 | |
| 713 | Offset += 68; // Reserved. |
| 714 | |
| 715 | // hidden_private_base and hidden_shared_base are only when the subtarget has |
| 716 | // ApertureRegs. |
| 717 | if (!ST.hasApertureRegs()) { |
| 718 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_private_base" , Offset, Args); |
| 719 | emitKernelArg(DL, Ty: Int32Ty, Alignment: Align(4), ValueKind: "hidden_shared_base" , Offset, Args); |
| 720 | } else { |
| 721 | Offset += 8; // Skipped. |
| 722 | } |
| 723 | |
| 724 | if (MFI.getUserSGPRInfo().hasQueuePtr()) |
| 725 | emitKernelArg(DL, Ty: Int8PtrTy, Alignment: Align(8), ValueKind: "hidden_queue_ptr" , Offset, Args); |
| 726 | } |
| 727 | |
| 728 | void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM, |
| 729 | const Function &Func, |
| 730 | msgpack::MapDocNode Kern) { |
| 731 | MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern); |
| 732 | |
| 733 | if (Func.getFnAttribute(Kind: "uniform-work-group-size" ).getValueAsBool()) |
| 734 | Kern[".uniform_work_group_size" ] = Kern.getDocument()->getNode(V: 1); |
| 735 | } |
| 736 | |
| 737 | //===----------------------------------------------------------------------===// |
| 738 | // HSAMetadataStreamerV6 |
| 739 | //===----------------------------------------------------------------------===// |
| 740 | |
| 741 | void MetadataStreamerMsgPackV6::emitVersion() { |
| 742 | auto Version = HSAMetadataDoc->getArrayNode(); |
| 743 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMajorV6)); |
| 744 | Version.push_back(N: Version.getDocument()->getNode(V: VersionMinorV6)); |
| 745 | getRootMetadata(Key: "amdhsa.version" ) = Version; |
| 746 | } |
| 747 | |
| 748 | } // end namespace AMDGPU::HSAMD |
| 749 | } // end namespace llvm |
| 750 | |