1 | //===--- HIPAMD.cpp - HIP Tool and ToolChain Implementations ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HIPAMD.h" |
10 | #include "AMDGPU.h" |
11 | #include "HIPUtility.h" |
12 | #include "SPIRV.h" |
13 | #include "clang/Basic/Cuda.h" |
14 | #include "clang/Driver/CommonArgs.h" |
15 | #include "clang/Driver/Compilation.h" |
16 | #include "clang/Driver/Driver.h" |
17 | #include "clang/Driver/InputInfo.h" |
18 | #include "clang/Driver/Options.h" |
19 | #include "clang/Driver/SanitizerArgs.h" |
20 | #include "llvm/Support/FileSystem.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/TargetParser/TargetParser.h" |
23 | |
24 | using namespace clang::driver; |
25 | using namespace clang::driver::toolchains; |
26 | using namespace clang::driver::tools; |
27 | using namespace clang; |
28 | using namespace llvm::opt; |
29 | |
30 | #if defined(_WIN32) || defined(_WIN64) |
31 | #define NULL_FILE "nul" |
32 | #else |
33 | #define NULL_FILE "/dev/null" |
34 | #endif |
35 | |
36 | void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C, |
37 | const JobAction &JA, |
38 | const InputInfoList &Inputs, |
39 | const InputInfo &Output, |
40 | const llvm::opt::ArgList &Args) const { |
41 | // Construct llvm-link command. |
42 | // The output from llvm-link is a bitcode file. |
43 | ArgStringList LlvmLinkArgs; |
44 | |
45 | assert(!Inputs.empty() && "Must have at least one input." ); |
46 | |
47 | LlvmLinkArgs.append(IL: {"-o" , Output.getFilename()}); |
48 | for (auto Input : Inputs) |
49 | LlvmLinkArgs.push_back(Elt: Input.getFilename()); |
50 | |
51 | // Look for archive of bundled bitcode in arguments, and add temporary files |
52 | // for the extracted archive of bitcode to inputs. |
53 | auto TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ); |
54 | AddStaticDeviceLibsLinking(C, T: *this, JA, Inputs, DriverArgs: Args, CmdArgs&: LlvmLinkArgs, Arch: "amdgcn" , |
55 | Target: TargetID, /*IsBitCodeSDL=*/isBitCodeSDL: true); |
56 | |
57 | const char *LlvmLink = |
58 | Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "llvm-link" )); |
59 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
60 | args&: LlvmLink, args&: LlvmLinkArgs, args: Inputs, |
61 | args: Output)); |
62 | } |
63 | |
64 | void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, |
65 | const InputInfoList &Inputs, |
66 | const InputInfo &Output, |
67 | const llvm::opt::ArgList &Args) const { |
68 | // Construct lld command. |
69 | // The output from ld.lld is an HSA code object file. |
70 | ArgStringList LldArgs{"-flavor" , |
71 | "gnu" , |
72 | "-m" , |
73 | "elf64_amdgpu" , |
74 | "--no-undefined" , |
75 | "-shared" , |
76 | "-plugin-opt=-amdgpu-internalize-symbols" }; |
77 | if (Args.hasArg(Ids: options::OPT_hipstdpar)) |
78 | LldArgs.push_back(Elt: "-plugin-opt=-amdgpu-enable-hipstdpar" ); |
79 | |
80 | auto &TC = getToolChain(); |
81 | auto &D = TC.getDriver(); |
82 | bool IsThinLTO = D.getOffloadLTOMode() == LTOK_Thin; |
83 | addLTOOptions(ToolChain: TC, Args, CmdArgs&: LldArgs, Output, Inputs, IsThinLTO); |
84 | |
85 | // Extract all the -m options |
86 | std::vector<llvm::StringRef> Features; |
87 | amdgpu::getAMDGPUTargetFeatures(D, Triple: TC.getTriple(), Args, Features); |
88 | |
89 | // Add features to mattr such as cumode |
90 | std::string MAttrString = "-plugin-opt=-mattr=" ; |
91 | for (auto OneFeature : unifyTargetFeatures(Features)) { |
92 | MAttrString.append(s: Args.MakeArgString(Str: OneFeature)); |
93 | if (OneFeature != Features.back()) |
94 | MAttrString.append(s: "," ); |
95 | } |
96 | if (!Features.empty()) |
97 | LldArgs.push_back(Elt: Args.MakeArgString(Str: MAttrString)); |
98 | |
99 | // ToDo: Remove this option after AMDGPU backend supports ISA-level linking. |
100 | // Since AMDGPU backend currently does not support ISA-level linking, all |
101 | // called functions need to be imported. |
102 | if (IsThinLTO) { |
103 | LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-force-import-all" )); |
104 | LldArgs.push_back(Elt: Args.MakeArgString(Str: "-plugin-opt=-avail-extern-to-local" )); |
105 | LldArgs.push_back(Elt: Args.MakeArgString( |
106 | Str: "-plugin-opt=-avail-extern-gv-in-addrspace-to-local=3" )); |
107 | } |
108 | |
109 | for (const Arg *A : Args.filtered(Ids: options::OPT_mllvm)) { |
110 | LldArgs.push_back( |
111 | Elt: Args.MakeArgString(Str: Twine("-plugin-opt=" ) + A->getValue(N: 0))); |
112 | } |
113 | |
114 | if (C.getDriver().isSaveTempsEnabled()) |
115 | LldArgs.push_back(Elt: "-save-temps" ); |
116 | |
117 | addLinkerCompressDebugSectionsOption(TC, Args, CmdArgs&: LldArgs); |
118 | |
119 | // Given that host and device linking happen in separate processes, the device |
120 | // linker doesn't always have the visibility as to which device symbols are |
121 | // needed by a program, especially for the device symbol dependencies that are |
122 | // introduced through the host symbol resolution. |
123 | // For example: host_A() (A.obj) --> host_B(B.obj) --> device_kernel_B() |
124 | // (B.obj) In this case, the device linker doesn't know that A.obj actually |
125 | // depends on the kernel functions in B.obj. When linking to static device |
126 | // library, the device linker may drop some of the device global symbols if |
127 | // they aren't referenced. As a workaround, we are adding to the |
128 | // --whole-archive flag such that all global symbols would be linked in. |
129 | LldArgs.push_back(Elt: "--whole-archive" ); |
130 | |
131 | for (auto *Arg : Args.filtered(Ids: options::OPT_Xoffload_linker)) { |
132 | StringRef ArgVal = Arg->getValue(N: 1); |
133 | auto SplitArg = ArgVal.split(Separator: "-mllvm=" ); |
134 | if (!SplitArg.second.empty()) { |
135 | LldArgs.push_back( |
136 | Elt: Args.MakeArgString(Str: Twine("-plugin-opt=" ) + SplitArg.second)); |
137 | } else { |
138 | LldArgs.push_back(Elt: Args.MakeArgString(Str: ArgVal)); |
139 | } |
140 | Arg->claim(); |
141 | } |
142 | |
143 | LldArgs.append(IL: {"-o" , Output.getFilename()}); |
144 | for (auto Input : Inputs) |
145 | LldArgs.push_back(Elt: Input.getFilename()); |
146 | |
147 | // Look for archive of bundled bitcode in arguments, and add temporary files |
148 | // for the extracted archive of bitcode to inputs. |
149 | auto TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ); |
150 | AddStaticDeviceLibsLinking(C, T: *this, JA, Inputs, DriverArgs: Args, CmdArgs&: LldArgs, Arch: "amdgcn" , |
151 | Target: TargetID, /*IsBitCodeSDL=*/isBitCodeSDL: true); |
152 | |
153 | LldArgs.push_back(Elt: "--no-whole-archive" ); |
154 | |
155 | const char *Lld = Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "lld" )); |
156 | C.addCommand(C: std::make_unique<Command>(args: JA, args: *this, args: ResponseFileSupport::None(), |
157 | args&: Lld, args&: LldArgs, args: Inputs, args: Output)); |
158 | } |
159 | |
160 | // For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode |
161 | // and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It |
162 | // calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will |
163 | // be promoted from experimental, we will switch to using that. TODO: consider |
164 | // if we want to run any targeted optimisations over IR here, over generic |
165 | // SPIR-V. |
166 | void AMDGCN::Linker::constructLinkAndEmitSpirvCommand( |
167 | Compilation &C, const JobAction &JA, const InputInfoList &Inputs, |
168 | const InputInfo &Output, const llvm::opt::ArgList &Args) const { |
169 | assert(!Inputs.empty() && "Must have at least one input." ); |
170 | |
171 | constructLlvmLinkCommand(C, JA, Inputs, Output, Args); |
172 | |
173 | // Linked BC is now in Output |
174 | |
175 | // Emit SPIR-V binary. |
176 | llvm::opt::ArgStringList TrArgs{ |
177 | "--spirv-max-version=1.6" , |
178 | "--spirv-ext=+all" , |
179 | "--spirv-allow-unknown-intrinsics" , |
180 | "--spirv-lower-const-expr" , |
181 | "--spirv-preserve-auxdata" , |
182 | "--spirv-debug-info-version=nonsemantic-shader-200" }; |
183 | SPIRV::constructTranslateCommand(C, T: *this, JA, Output, Input: Output, Args: TrArgs); |
184 | } |
185 | |
186 | // For amdgcn the inputs of the linker job are device bitcode and output is |
187 | // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt, |
188 | // llc, then lld steps. |
189 | void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
190 | const InputInfo &Output, |
191 | const InputInfoList &Inputs, |
192 | const ArgList &Args, |
193 | const char *LinkingOutput) const { |
194 | if (Inputs.size() > 0 && |
195 | Inputs[0].getType() == types::TY_Image && |
196 | JA.getType() == types::TY_Object) |
197 | return HIP::constructGenerateObjFileFromHIPFatBinary(C, Output, Inputs, |
198 | Args, JA, T: *this); |
199 | |
200 | if (JA.getType() == types::TY_HIP_FATBIN) |
201 | return HIP::constructHIPFatbinCommand(C, JA, OutputFileName: Output.getFilename(), Inputs, |
202 | TCArgs: Args, T: *this); |
203 | |
204 | if (JA.getType() == types::TY_LLVM_BC) |
205 | return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); |
206 | |
207 | if (getToolChain().getEffectiveTriple().isSPIRV()) |
208 | return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args); |
209 | |
210 | return constructLldCommand(C, JA, Inputs, Output, Args); |
211 | } |
212 | |
213 | HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple, |
214 | const ToolChain &HostTC, const ArgList &Args) |
215 | : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { |
216 | // Lookup binaries into the driver directory, this is used to |
217 | // discover the clang-offload-bundler executable. |
218 | getProgramPaths().push_back(Elt: getDriver().Dir); |
219 | // Diagnose unsupported sanitizer options only once. |
220 | diagnoseUnsupportedSanitizers(Args); |
221 | } |
222 | |
223 | void HIPAMDToolChain::addClangTargetOptions( |
224 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
225 | Action::OffloadKind DeviceOffloadingKind) const { |
226 | HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind); |
227 | |
228 | assert(DeviceOffloadingKind == Action::OFK_HIP && |
229 | "Only HIP offloading kinds are supported for GPUs." ); |
230 | |
231 | CC1Args.append(IL: {"-fcuda-is-device" , "-fno-threadsafe-statics" }); |
232 | |
233 | if (!DriverArgs.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc, |
234 | Default: false)) { |
235 | CC1Args.append(IL: {"-mllvm" , "-amdgpu-internalize-symbols" }); |
236 | if (DriverArgs.hasArgNoClaim(Ids: options::OPT_hipstdpar)) |
237 | CC1Args.append(IL: {"-mllvm" , "-amdgpu-enable-hipstdpar" }); |
238 | } |
239 | |
240 | StringRef MaxThreadsPerBlock = |
241 | DriverArgs.getLastArgValue(Id: options::OPT_gpu_max_threads_per_block_EQ); |
242 | if (!MaxThreadsPerBlock.empty()) { |
243 | std::string ArgStr = |
244 | (Twine("--gpu-max-threads-per-block=" ) + MaxThreadsPerBlock).str(); |
245 | CC1Args.push_back(Elt: DriverArgs.MakeArgStringRef(Str: ArgStr)); |
246 | } |
247 | |
248 | CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions" ); |
249 | |
250 | // Default to "hidden" visibility, as object level linking will not be |
251 | // supported for the foreseeable future. |
252 | if (!DriverArgs.hasArg(Ids: options::OPT_fvisibility_EQ, |
253 | Ids: options::OPT_fvisibility_ms_compat)) { |
254 | CC1Args.append(IL: {"-fvisibility=hidden" }); |
255 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
256 | } |
257 | |
258 | if (getEffectiveTriple().isSPIRV()) { |
259 | // For SPIR-V we embed the command-line into the generated binary, in order |
260 | // to retrieve it at JIT time and be able to do target specific compilation |
261 | // with options that match the user-supplied ones. |
262 | if (!DriverArgs.hasArg(Ids: options::OPT_fembed_bitcode_marker)) |
263 | CC1Args.push_back(Elt: "-fembed-bitcode=marker" ); |
264 | return; // No DeviceLibs for SPIR-V. |
265 | } |
266 | |
267 | for (auto BCFile : getDeviceLibs(Args: DriverArgs)) { |
268 | CC1Args.push_back(Elt: BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" |
269 | : "-mlink-bitcode-file" ); |
270 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile.Path)); |
271 | } |
272 | } |
273 | |
274 | llvm::opt::DerivedArgList * |
275 | HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, |
276 | StringRef BoundArch, |
277 | Action::OffloadKind DeviceOffloadKind) const { |
278 | DerivedArgList *DAL = |
279 | HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
280 | if (!DAL) |
281 | DAL = new DerivedArgList(Args.getBaseArgs()); |
282 | |
283 | const OptTable &Opts = getDriver().getOpts(); |
284 | |
285 | for (Arg *A : Args) { |
286 | if (!shouldSkipSanitizeOption(TC: *this, DriverArgs: Args, TargetID: BoundArch, A)) |
287 | DAL->append(A); |
288 | } |
289 | |
290 | if (!BoundArch.empty()) { |
291 | DAL->eraseArg(Id: options::OPT_mcpu_EQ); |
292 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_mcpu_EQ), Value: BoundArch); |
293 | checkTargetID(DriverArgs: *DAL); |
294 | } |
295 | |
296 | if (!Args.hasArg(Ids: options::OPT_flto_partitions_EQ)) |
297 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_flto_partitions_EQ), |
298 | Value: "8" ); |
299 | |
300 | return DAL; |
301 | } |
302 | |
303 | Tool *HIPAMDToolChain::buildLinker() const { |
304 | assert(getTriple().isAMDGCN() || |
305 | getTriple().getArch() == llvm::Triple::spirv64); |
306 | return new tools::AMDGCN::Linker(*this); |
307 | } |
308 | |
309 | void HIPAMDToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
310 | AMDGPUToolChain::addClangWarningOptions(CC1Args); |
311 | HostTC.addClangWarningOptions(CC1Args); |
312 | } |
313 | |
314 | ToolChain::CXXStdlibType |
315 | HIPAMDToolChain::GetCXXStdlibType(const ArgList &Args) const { |
316 | return HostTC.GetCXXStdlibType(Args); |
317 | } |
318 | |
319 | void HIPAMDToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, |
320 | ArgStringList &CC1Args) const { |
321 | HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); |
322 | } |
323 | |
324 | void HIPAMDToolChain::AddClangCXXStdlibIncludeArgs( |
325 | const ArgList &Args, ArgStringList &CC1Args) const { |
326 | HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args); |
327 | } |
328 | |
329 | void HIPAMDToolChain::AddIAMCUIncludeArgs(const ArgList &Args, |
330 | ArgStringList &CC1Args) const { |
331 | HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args); |
332 | } |
333 | |
334 | void HIPAMDToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, |
335 | ArgStringList &CC1Args) const { |
336 | RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args); |
337 | } |
338 | |
339 | SanitizerMask HIPAMDToolChain::getSupportedSanitizers() const { |
340 | // The HIPAMDToolChain only supports sanitizers in the sense that it allows |
341 | // sanitizer arguments on the command line if they are supported by the host |
342 | // toolchain. The HIPAMDToolChain will actually ignore any command line |
343 | // arguments for any of these "supported" sanitizers. That means that no |
344 | // sanitization of device code is actually supported at this time. |
345 | // |
346 | // This behavior is necessary because the host and device toolchains |
347 | // invocations often share the command line, so the device toolchain must |
348 | // tolerate flags meant only for the host toolchain. |
349 | return HostTC.getSupportedSanitizers(); |
350 | } |
351 | |
352 | VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, |
353 | const ArgList &Args) const { |
354 | return HostTC.computeMSVCVersion(D, Args); |
355 | } |
356 | |
357 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
358 | HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { |
359 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
360 | if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, |
361 | Default: true) || |
362 | getGPUArch(DriverArgs) == "amdgcnspirv" ) |
363 | return {}; |
364 | ArgStringList LibraryPaths; |
365 | |
366 | // Find in --hip-device-lib-path and HIP_LIBRARY_PATH. |
367 | for (StringRef Path : RocmInstallation->getRocmDeviceLibPathArg()) |
368 | LibraryPaths.push_back(Elt: DriverArgs.MakeArgString(Str: Path)); |
369 | |
370 | addDirectoryList(Args: DriverArgs, CmdArgs&: LibraryPaths, ArgName: "" , EnvVar: "HIP_DEVICE_LIB_PATH" ); |
371 | |
372 | // Maintain compatability with --hip-device-lib. |
373 | auto BCLibArgs = DriverArgs.getAllArgValues(Id: options::OPT_hip_device_lib_EQ); |
374 | if (!BCLibArgs.empty()) { |
375 | llvm::for_each(Range&: BCLibArgs, F: [&](StringRef BCName) { |
376 | StringRef FullName; |
377 | for (StringRef LibraryPath : LibraryPaths) { |
378 | SmallString<128> Path(LibraryPath); |
379 | llvm::sys::path::append(path&: Path, a: BCName); |
380 | FullName = Path; |
381 | if (llvm::sys::fs::exists(Path: FullName)) { |
382 | BCLibs.emplace_back(Args&: FullName); |
383 | return; |
384 | } |
385 | } |
386 | getDriver().Diag(DiagID: diag::err_drv_no_such_file) << BCName; |
387 | }); |
388 | } else { |
389 | if (!RocmInstallation->hasDeviceLibrary()) { |
390 | getDriver().Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 0; |
391 | return {}; |
392 | } |
393 | StringRef GpuArch = getGPUArch(DriverArgs); |
394 | assert(!GpuArch.empty() && "Must have an explicit GPU arch." ); |
395 | |
396 | // Add common device libraries like ocml etc. |
397 | for (auto N : getCommonDeviceLibNames(DriverArgs, GPUArch: GpuArch.str())) |
398 | BCLibs.emplace_back(Args&: N); |
399 | |
400 | // Add instrument lib. |
401 | auto InstLib = |
402 | DriverArgs.getLastArgValue(Id: options::OPT_gpu_instrument_lib_EQ); |
403 | if (InstLib.empty()) |
404 | return BCLibs; |
405 | if (llvm::sys::fs::exists(Path: InstLib)) |
406 | BCLibs.emplace_back(Args&: InstLib); |
407 | else |
408 | getDriver().Diag(DiagID: diag::err_drv_no_such_file) << InstLib; |
409 | } |
410 | |
411 | return BCLibs; |
412 | } |
413 | |
414 | void HIPAMDToolChain::checkTargetID( |
415 | const llvm::opt::ArgList &DriverArgs) const { |
416 | auto PTID = getParsedTargetID(DriverArgs); |
417 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch && |
418 | PTID.OptionalTargetID != "amdgcnspirv" ) |
419 | getDriver().Diag(DiagID: clang::diag::err_drv_bad_target_id) |
420 | << *PTID.OptionalTargetID; |
421 | } |
422 | |
423 | SPIRVAMDToolChain::SPIRVAMDToolChain(const Driver &D, |
424 | const llvm::Triple &Triple, |
425 | const ArgList &Args) |
426 | : ROCMToolChain(D, Triple, Args) { |
427 | getProgramPaths().push_back(Elt: getDriver().Dir); |
428 | } |
429 | |
430 | Tool *SPIRVAMDToolChain::buildLinker() const { |
431 | assert(getTriple().getArch() == llvm::Triple::spirv64); |
432 | return new tools::AMDGCN::Linker(*this); |
433 | } |
434 | |