1//===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HIPAMD.h"
10#include "AMDGPU.h"
11#include "HIPUtility.h"
12#include "SPIRV.h"
13#include "clang/Basic/Cuda.h"
14#include "clang/Driver/CommonArgs.h"
15#include "clang/Driver/Compilation.h"
16#include "clang/Driver/Driver.h"
17#include "clang/Driver/InputInfo.h"
18#include "clang/Driver/SanitizerArgs.h"
19#include "clang/Options/Options.h"
20#include "llvm/Support/FileSystem.h"
21#include "llvm/Support/Path.h"
22#include "llvm/TargetParser/TargetParser.h"
23
24using namespace clang::driver;
25using namespace clang::driver::toolchains;
26using namespace clang::driver::tools;
27using namespace clang;
28using namespace llvm::opt;
29
30#if defined(_WIN32) || defined(_WIN64)
31#define NULL_FILE "nul"
32#else
33#define NULL_FILE "/dev/null"
34#endif
35
36void AMDGCN::Linker::constructLLVMLinkCommand(
37 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
38 const InputInfo &Output, const llvm::opt::ArgList &Args) const {
39
40 ArgStringList LinkerInputs;
41
42 for (auto Input : Inputs)
43 LinkerInputs.push_back(Elt: Input.getFilename());
44
45 // Look for archive of bundled bitcode in arguments, and add temporary files
46 // for the extracted archive of bitcode to inputs.
47 auto TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ);
48 AddStaticDeviceLibsLinking(C, T: *this, JA, Inputs, DriverArgs: Args, CmdArgs&: LinkerInputs, Arch: "amdgcn",
49 Target: TargetID, /*IsBitCodeSDL=*/isBitCodeSDL: true);
50 tools::constructLLVMLinkCommand(C, T: *this, JA, JobInputs: Inputs, LinkerInputs, Output,
51 Args);
52}
53
54void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
55 const InputInfoList &Inputs,
56 const InputInfo &Output,
57 const llvm::opt::ArgList &Args) const {
58 // Construct lld command.
59 // The output from ld.lld is an HSA code object file.
60 ArgStringList LldArgs{"-flavor",
61 "gnu",
62 "-m",
63 "elf64_amdgpu",
64 "--no-undefined",
65 "-shared",
66 "-plugin-opt=-amdgpu-internalize-symbols"};
67 if (Args.hasArg(Ids: options::OPT_hipstdpar))
68 LldArgs.push_back(Elt: "-plugin-opt=-amdgpu-enable-hipstdpar");
69
70 auto &TC = getToolChain();
71 auto &D = TC.getDriver();
72 bool IsThinLTO = D.getOffloadLTOMode() == LTOK_Thin;
73 addLTOOptions(ToolChain: TC, Args, CmdArgs&: LldArgs, Output, Inputs, IsThinLTO);
74
75 // Extract all the -m options
76 std::vector<llvm::StringRef> Features;
77 amdgpu::getAMDGPUTargetFeatures(D, Triple: TC.getTriple(), Args, Features);
78
79 // Add features to mattr such as cumode
80 std::string MAttrString = "-plugin-opt=-mattr=";
81 for (auto OneFeature : unifyTargetFeatures(Features)) {
82 MAttrString.append(s: Args.MakeArgString(Str: OneFeature));
83 if (OneFeature != Features.back())
84 MAttrString.append(s: ",");
85 }
86 if (!Features.empty())
87 LldArgs.push_back(Elt: Args.MakeArgString(Str: MAttrString));
88
89 // ToDo: Remove this option after AMDGPU backend supports ISA-level linking.
90 // Since AMDGPU backend currently does not support ISA-level linking, all
91 // called functions need to be imported.
92 if (IsThinLTO) {
93 LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-force-import-all"));
94 LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-avail-extern-to-local"));
95 LldArgs.push_back(Elt: Args.MakeArgString(
96 Str: "-plugin-opt=-avail-extern-gv-in-addrspace-to-local=3"));
97 }
98
99 for (const Arg *A : Args.filtered(Ids: options::OPT_mllvm)) {
100 LldArgs.push_back(
101 Elt: Args.MakeArgString(Str: Twine("-plugin-opt=") + A->getValue(N: 0)));
102 }
103
104 if (C.getDriver().isSaveTempsEnabled())
105 LldArgs.push_back(Elt: "-save-temps");
106
107 addLinkerCompressDebugSectionsOption(TC, Args, CmdArgs&: LldArgs);
108
109 // Given that host and device linking happen in separate processes, the device
110 // linker doesn't always have the visibility as to which device symbols are
111 // needed by a program, especially for the device symbol dependencies that are
112 // introduced through the host symbol resolution.
113 // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B()
114 // (B.obj) In this case, the device linker doesn't know that A.obj actually
115 // depends on the kernel functions in B.obj. When linking to static device
116 // library, the device linker may drop some of the device global symbols if
117 // they aren't referenced. As a workaround, we are adding to the
118 // --whole-archive flag such that all global symbols would be linked in.
119 LldArgs.push_back(Elt: "--whole-archive");
120
121 for (auto *Arg : Args.filtered(Ids: options::OPT_Xoffload_linker)) {
122 StringRef ArgVal = Arg->getValue(N: 1);
123 auto SplitArg = ArgVal.split(Separator: "-mllvm=");
124 if (!SplitArg.second.empty()) {
125 LldArgs.push_back(
126 Elt: Args.MakeArgString(Str: Twine("-plugin-opt=") + SplitArg.second));
127 } else {
128 LldArgs.push_back(Elt: Args.MakeArgString(Str: ArgVal));
129 }
130 Arg->claim();
131 }
132
133 LldArgs.append(IL: {"-o", Output.getFilename()});
134 for (auto Input : Inputs)
135 LldArgs.push_back(Elt: Input.getFilename());
136
137 // Look for archive of bundled bitcode in arguments, and add temporary files
138 // for the extracted archive of bitcode to inputs.
139 auto TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ);
140 AddStaticDeviceLibsLinking(C, T: *this, JA, Inputs, DriverArgs: Args, CmdArgs&: LldArgs, Arch: "amdgcn",
141 Target: TargetID, /*IsBitCodeSDL=*/isBitCodeSDL: true);
142
143 LldArgs.push_back(Elt: "--no-whole-archive");
144
145 const char *Lld = Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "lld"));
146 C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(),
147 args&: Lld, args&: LldArgs, args: Inputs, args: Output));
148}
149
150// For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode
151// and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It
152// calls llvm-link and then the llvm-spirv translator or the SPIR-V BE.
153// TODO: consider if we want to run any targeted optimisations over IR here,
154// over generic SPIR-V.
155void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
156 Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
157 const InputInfo &Output, const llvm::opt::ArgList &Args) const {
158 assert(!Inputs.empty() && "Must have at least one input.");
159
160 std::string LinkedBCFilePrefix(
161 Twine(llvm::sys::path::stem(path: Output.getFilename()), "-linked").str());
162 const char *LinkedBCFilePath = HIP::getTempFile(C, Prefix: LinkedBCFilePrefix, Extension: "bc");
163 InputInfo LinkedBCFile(&JA, LinkedBCFilePath, Output.getBaseInput());
164
165 bool UseSPIRVBackend =
166 Args.hasFlag(Pos: options::OPT_use_spirv_backend,
167 Neg: options::OPT_no_use_spirv_backend, /*Default=*/false);
168
169 constructLLVMLinkCommand(C, JA, Inputs, Output: LinkedBCFile, Args);
170
171 if (UseSPIRVBackend) {
172 // This code handles the case in the new driver when --offload-device-only
173 // is unset and clang-linker-wrapper forwards the bitcode that must be
174 // compiled to SPIR-V.
175
176 llvm::opt::ArgStringList CmdArgs;
177 const char *Triple =
178 C.getArgs().MakeArgString(Str: "-triple=spirv64-amd-amdhsa");
179
180 CmdArgs.append(IL: {"-cc1", Triple, "-emit-obj", "-disable-llvm-optzns",
181 LinkedBCFile.getFilename(), "-o", Output.getFilename()});
182
183 const Driver &Driver = getToolChain().getDriver();
184 const char *Exec = Driver.getClangProgramPath();
185 C.addCommand(C: std::make_unique<Command>(
186 args: JA, args: *this, args: ResponseFileSupport::None(), args&: Exec, args&: CmdArgs, args&: LinkedBCFile,
187 args: Output, args: Driver.getPrependArg()));
188 } else {
189 // Emit SPIR-V binary using the translator
190 llvm::opt::ArgStringList TrArgs{
191 "--spirv-max-version=1.6",
192 "--spirv-ext=+all",
193 "--spirv-allow-unknown-intrinsics",
194 "--spirv-lower-const-expr",
195 "--spirv-preserve-auxdata",
196 "--spirv-debug-info-version=nonsemantic-shader-200"};
197 SPIRV::constructTranslateCommand(C, T: *this, JA, Output, Input: LinkedBCFile,
198 Args: TrArgs);
199 }
200}
201
202// For amdgcn the inputs of the linker job are device bitcode and output is
203// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
204// llc, then lld steps.
205void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
206 const InputInfo &Output,
207 const InputInfoList &Inputs,
208 const ArgList &Args,
209 const char *LinkingOutput) const {
210 if (Inputs.size() > 0 &&
211 Inputs[0].getType() == types::TY_Image &&
212 JA.getType() == types::TY_Object)
213 return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs,
214 Args, JA, T: *this);
215
216 if (JA.getType() == types::TY_HIP_FATBIN)
217 return HIP::constructHIPFatbinCommand(C, JA, OutputFileName: Output.getFilename(), Inputs,
218 TCArgs: Args, T: *this);
219
220 if (JA.getType() == types::TY_LLVM_BC)
221 return constructLLVMLinkCommand(C, JA, Inputs, Output, Args);
222
223 if (getToolChain().getEffectiveTriple().isSPIRV())
224 return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args);
225
226 return constructLldCommand(C, JA, Inputs, Output, Args);
227}
228
229HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple,
230 const ToolChain &HostTC, const ArgList &Args)
231 : ROCMToolChain(D, Triple, Args), HostTC(HostTC) {
232 // Lookup binaries into the driver directory, this is used to
233 // discover the clang-offload-bundler executable.
234 getProgramPaths().push_back(Elt: getDriver().Dir);
235}
236
237void HIPAMDToolChain::addClangTargetOptions(
238 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
239 Action::OffloadKind DeviceOffloadingKind) const {
240 HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind);
241
242 assert(DeviceOffloadingKind == Action::OFK_HIP &&
243 "Only HIP offloading kinds are supported for GPUs.");
244
245 CC1Args.append(IL: {"-fcuda-is-device", "-fno-threadsafe-statics"});
246
247 if (!DriverArgs.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc,
248 Default: false)) {
249 CC1Args.append(IL: {"-mllvm", "-amdgpu-internalize-symbols"});
250 if (DriverArgs.hasArgNoClaim(Ids: options::OPT_hipstdpar))
251 CC1Args.append(IL: {"-mllvm", "-amdgpu-enable-hipstdpar"});
252 }
253
254 StringRef MaxThreadsPerBlock =
255 DriverArgs.getLastArgValue(Id: options::OPT_gpu_max_threads_per_block_EQ);
256 if (!MaxThreadsPerBlock.empty()) {
257 std::string ArgStr =
258 (Twine("--gpu-max-threads-per-block=") + MaxThreadsPerBlock).str();
259 CC1Args.push_back(Elt: DriverArgs.MakeArgStringRef(Str: ArgStr));
260 }
261
262 CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions");
263
264 // Default to "hidden" visibility, as object level linking will not be
265 // supported for the foreseeable future.
266 if (!DriverArgs.hasArg(Ids: options::OPT_fvisibility_EQ,
267 Ids: options::OPT_fvisibility_ms_compat)) {
268 CC1Args.append(IL: {"-fvisibility=hidden"});
269 CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs");
270 }
271
272 if (getEffectiveTriple().isSPIRV()) {
273 // For SPIR-V we embed the command-line into the generated binary, in order
274 // to retrieve it at JIT time and be able to do target specific compilation
275 // with options that match the user-supplied ones.
276 if (!DriverArgs.hasArg(Ids: options::OPT_fembed_bitcode_marker))
277 CC1Args.push_back(Elt: "-fembed-bitcode=marker");
278 // For SPIR-V we want to retain the pristine output of Clang CodeGen, since
279 // optimizations might lose structure / information that is necessary for
280 // generating optimal concrete AMDGPU code. We duplicate this because the
281 // HIP TC doesn't invoke the base AMDGPU TC addClangTargetOptions.
282 if (!DriverArgs.hasArg(Ids: options::OPT_disable_llvm_passes))
283 CC1Args.push_back(Elt: "-disable-llvm-passes");
284 return; // No DeviceLibs for SPIR-V.
285 }
286
287 for (auto BCFile : getDeviceLibs(Args: DriverArgs, DeviceOffloadKind: DeviceOffloadingKind)) {
288 CC1Args.push_back(Elt: BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
289 : "-mlink-bitcode-file");
290 CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile.Path));
291 }
292}
293
294llvm::opt::DerivedArgList *
295HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
296 StringRef BoundArch,
297 Action::OffloadKind DeviceOffloadKind) const {
298 DerivedArgList *DAL =
299 HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
300 if (!DAL)
301 DAL = new DerivedArgList(Args.getBaseArgs());
302
303 const OptTable &Opts = getDriver().getOpts();
304
305 for (Arg *A : Args) {
306 // Filter unsupported sanitizers passed from the HostTC.
307 if (!handleSanitizeOption(TC: *this, DAL&: *DAL, DriverArgs: Args, TargetID: BoundArch, A))
308 DAL->append(A);
309 }
310
311 if (!BoundArch.empty()) {
312 DAL->eraseArg(Id: options::OPT_mcpu_EQ);
313 DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_mcpu_EQ), Value: BoundArch);
314 checkTargetID(DriverArgs: *DAL);
315 }
316
317 if (!Args.hasArg(Ids: options::OPT_flto_partitions_EQ))
318 DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_flto_partitions_EQ),
319 Value: "8");
320
321 return DAL;
322}
323
324Tool *HIPAMDToolChain::buildLinker() const {
325 assert(getTriple().isAMDGCN() ||
326 getTriple().getArch() == llvm::Triple::spirv64);
327 return new tools::AMDGCN::Linker(*this);
328}
329
330void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
331 AMDGPUToolChain::addClangWarningOptions(CC1Args);
332 HostTC.addClangWarningOptions(CC1Args);
333}
334
335ToolChain::CXXStdlibType
336HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const {
337 return HostTC.GetCXXStdlibType(Args);
338}
339
340void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
341 ArgStringList &CC1Args) const {
342 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
343}
344
345void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs(
346 const ArgList &Args, ArgStringList &CC1Args) const {
347 HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
348}
349
350void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
351 ArgStringList &CC1Args) const {
352 HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
353}
354
355void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
356 ArgStringList &CC1Args) const {
357 RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args);
358}
359
360SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const {
361 // The HIPAMDToolChain only supports sanitizers in the sense that it allows
362 // sanitizer arguments on the command line if they are supported by the host
363 // toolchain. The HIPAMDToolChain will later filter unsupported sanitizers
364 // from the command line arguments.
365 //
366 // This behavior is necessary because the host and device toolchains
367 // invocations often share the command line, so the device toolchain must
368 // tolerate flags meant only for the host toolchain.
369 return HostTC.getSupportedSanitizers();
370}
371
372VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
373 const ArgList &Args) const {
374 return HostTC.computeMSVCVersion(D, Args);
375}
376
377llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
378HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
379 Action::OffloadKind DeviceOffloadingKind) const {
380 llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
381 if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib,
382 Default: true) ||
383 getGPUArch(DriverArgs) == "amdgcnspirv")
384 return {};
385 ArgStringList LibraryPaths;
386
387 // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
388 for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg())
389 LibraryPaths.push_back(Elt: DriverArgs.MakeArgString(Str: Path));
390
391 addDirectoryList(Args: DriverArgs, CmdArgs&: LibraryPaths, ArgName: "", EnvVar: "HIP_DEVICE_LIB_PATH");
392
393 // Maintain compatability with --hip-device-lib.
394 auto BCLibArgs = DriverArgs.getAllArgValues(Id: options::OPT_hip_device_lib_EQ);
395 if (!BCLibArgs.empty()) {
396 for (StringRef BCName : BCLibArgs) {
397 StringRef FullName;
398 bool Found = false;
399 for (StringRef LibraryPath : LibraryPaths) {
400 SmallString<128> Path(LibraryPath);
401 llvm::sys::path::append(path&: Path, a: BCName);
402 FullName = Path;
403 if (llvm::sys::fs::exists(Path: FullName)) {
404 BCLibs.emplace_back(Args&: FullName);
405 Found = true;
406 break;
407 }
408 }
409 if (!Found)
410 getDriver().Diag(DiagID: diag::err_drv_no_such_file) << BCName;
411 }
412 } else {
413 if (!RocmInstallation->hasDeviceLibrary()) {
414 getDriver().Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 0;
415 return {};
416 }
417 StringRef GpuArch = getGPUArch(DriverArgs);
418 assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
419
420 // Add common device libraries like ocml etc.
421 for (auto N : getCommonDeviceLibNames(DriverArgs, GPUArch: GpuArch.str(),
422 DeviceOffloadingKind))
423 BCLibs.emplace_back(Args&: N);
424
425 // Add instrument lib.
426 auto InstLib =
427 DriverArgs.getLastArgValue(Id: options::OPT_gpu_instrument_lib_EQ);
428 if (InstLib.empty())
429 return BCLibs;
430 if (llvm::sys::fs::exists(Path: InstLib))
431 BCLibs.emplace_back(Args&: InstLib);
432 else
433 getDriver().Diag(DiagID: diag::err_drv_no_such_file) << InstLib;
434 }
435
436 return BCLibs;
437}
438
439void HIPAMDToolChain::checkTargetID(
440 const llvm::opt::ArgList &DriverArgs) const {
441 auto PTID = getParsedTargetID(DriverArgs);
442 if (PTID.OptionalTargetID && !PTID.OptionalGPUArch &&
443 PTID.OptionalTargetID != "amdgcnspirv")
444 getDriver().Diag(DiagID: clang::diag::err_drv_bad_target_id)
445 << *PTID.OptionalTargetID;
446}
447
448SPIRVAMDToolChain::SPIRVAMDToolChain(const Driver &D,
449 const llvm::Triple &Triple,
450 const ArgList &Args)
451 : ROCMToolChain(D, Triple, Args) {
452 getProgramPaths().push_back(Elt: getDriver().Dir);
453}
454
455Tool *SPIRVAMDToolChain::buildLinker() const {
456 assert(getTriple().getArch() == llvm::Triple::spirv64);
457 return new tools::AMDGCN::Linker(*this);
458}
459