1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPU.h" |
10 | #include "CommonArgs.h" |
11 | #include "clang/Basic/TargetID.h" |
12 | #include "clang/Config/config.h" |
13 | #include "clang/Driver/Compilation.h" |
14 | #include "clang/Driver/DriverDiagnostic.h" |
15 | #include "clang/Driver/InputInfo.h" |
16 | #include "clang/Driver/Options.h" |
17 | #include "clang/Driver/SanitizerArgs.h" |
18 | #include "llvm/ADT/StringExtras.h" |
19 | #include "llvm/Option/ArgList.h" |
20 | #include "llvm/Support/Error.h" |
21 | #include "llvm/Support/LineIterator.h" |
22 | #include "llvm/Support/Path.h" |
23 | #include "llvm/Support/Process.h" |
24 | #include "llvm/Support/VirtualFileSystem.h" |
25 | #include "llvm/TargetParser/Host.h" |
26 | #include <optional> |
27 | #include <system_error> |
28 | |
29 | using namespace clang::driver; |
30 | using namespace clang::driver::tools; |
31 | using namespace clang::driver::toolchains; |
32 | using namespace clang; |
33 | using namespace llvm::opt; |
34 | |
35 | // Look for sub-directory starts with PackageName under ROCm candidate path. |
36 | // If there is one and only one matching sub-directory found, append the |
37 | // sub-directory to Path. If there is no matching sub-directory or there are |
38 | // more than one matching sub-directories, diagnose them. Returns the full |
39 | // path of the package if there is only one matching sub-directory, otherwise |
40 | // returns an empty string. |
41 | llvm::SmallString<0> |
42 | RocmInstallationDetector::findSPACKPackage(const Candidate &Cand, |
43 | StringRef PackageName) { |
44 | if (!Cand.isSPACK()) |
45 | return {}; |
46 | std::error_code EC; |
47 | std::string Prefix = Twine(PackageName + "-" + Cand.SPACKReleaseStr).str(); |
48 | llvm::SmallVector<llvm::SmallString<0>> SubDirs; |
49 | for (llvm::vfs::directory_iterator File = D.getVFS().dir_begin(Dir: Cand.Path, EC), |
50 | FileEnd; |
51 | File != FileEnd && !EC; File.increment(EC)) { |
52 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
53 | if (FileName.starts_with(Prefix)) { |
54 | SubDirs.push_back(Elt: FileName); |
55 | if (SubDirs.size() > 1) |
56 | break; |
57 | } |
58 | } |
59 | if (SubDirs.size() == 1) { |
60 | auto PackagePath = Cand.Path; |
61 | llvm::sys::path::append(path&: PackagePath, a: SubDirs[0]); |
62 | return PackagePath; |
63 | } |
64 | if (SubDirs.size() == 0 && Verbose) { |
65 | llvm::errs() << "SPACK package " << Prefix << " not found at " << Cand.Path |
66 | << '\n'; |
67 | return {}; |
68 | } |
69 | |
70 | if (SubDirs.size() > 1 && Verbose) { |
71 | llvm::errs() << "Cannot use SPACK package " << Prefix << " at " << Cand.Path |
72 | << " due to multiple installations for the same version\n" ; |
73 | } |
74 | return {}; |
75 | } |
76 | |
77 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
78 | assert(!Path.empty()); |
79 | |
80 | const StringRef Suffix(".bc" ); |
81 | const StringRef Suffix2(".amdgcn.bc" ); |
82 | |
83 | std::error_code EC; |
84 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Dir: Path, EC), LE; |
85 | !EC && LI != LE; LI = LI.increment(EC)) { |
86 | StringRef FilePath = LI->path(); |
87 | StringRef FileName = llvm::sys::path::filename(path: FilePath); |
88 | if (!FileName.ends_with(Suffix)) |
89 | continue; |
90 | |
91 | StringRef BaseName; |
92 | if (FileName.ends_with(Suffix: Suffix2)) |
93 | BaseName = FileName.drop_back(N: Suffix2.size()); |
94 | else if (FileName.ends_with(Suffix)) |
95 | BaseName = FileName.drop_back(N: Suffix.size()); |
96 | |
97 | const StringRef ABIVersionPrefix = "oclc_abi_version_" ; |
98 | if (BaseName == "ocml" ) { |
99 | OCML = FilePath; |
100 | } else if (BaseName == "ockl" ) { |
101 | OCKL = FilePath; |
102 | } else if (BaseName == "opencl" ) { |
103 | OpenCL = FilePath; |
104 | } else if (BaseName == "hip" ) { |
105 | HIP = FilePath; |
106 | } else if (BaseName == "asanrtl" ) { |
107 | AsanRTL = FilePath; |
108 | } else if (BaseName == "oclc_finite_only_off" ) { |
109 | FiniteOnly.Off = FilePath; |
110 | } else if (BaseName == "oclc_finite_only_on" ) { |
111 | FiniteOnly.On = FilePath; |
112 | } else if (BaseName == "oclc_daz_opt_on" ) { |
113 | DenormalsAreZero.On = FilePath; |
114 | } else if (BaseName == "oclc_daz_opt_off" ) { |
115 | DenormalsAreZero.Off = FilePath; |
116 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on" ) { |
117 | CorrectlyRoundedSqrt.On = FilePath; |
118 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off" ) { |
119 | CorrectlyRoundedSqrt.Off = FilePath; |
120 | } else if (BaseName == "oclc_unsafe_math_on" ) { |
121 | UnsafeMath.On = FilePath; |
122 | } else if (BaseName == "oclc_unsafe_math_off" ) { |
123 | UnsafeMath.Off = FilePath; |
124 | } else if (BaseName == "oclc_wavefrontsize64_on" ) { |
125 | WavefrontSize64.On = FilePath; |
126 | } else if (BaseName == "oclc_wavefrontsize64_off" ) { |
127 | WavefrontSize64.Off = FilePath; |
128 | } else if (BaseName.starts_with(Prefix: ABIVersionPrefix)) { |
129 | unsigned ABIVersionNumber; |
130 | if (BaseName.drop_front(N: ABIVersionPrefix.size()) |
131 | .getAsInteger(/*Redex=*/Radix: 0, Result&: ABIVersionNumber)) |
132 | continue; |
133 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
134 | } else { |
135 | // Process all bitcode filenames that look like |
136 | // ocl_isa_version_XXX.amdgcn.bc |
137 | const StringRef DeviceLibPrefix = "oclc_isa_version_" ; |
138 | if (!BaseName.starts_with(Prefix: DeviceLibPrefix)) |
139 | continue; |
140 | |
141 | StringRef IsaVersionNumber = |
142 | BaseName.drop_front(N: DeviceLibPrefix.size()); |
143 | |
144 | llvm::Twine GfxName = Twine("gfx" ) + IsaVersionNumber; |
145 | SmallString<8> Tmp; |
146 | LibDeviceMap.insert( |
147 | KV: std::make_pair(x: GfxName.toStringRef(Out&: Tmp), y: FilePath.str())); |
148 | } |
149 | } |
150 | } |
151 | |
152 | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
153 | // the parsing fails. |
154 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
155 | SmallVector<StringRef, 4> VersionParts; |
156 | V.split(A&: VersionParts, Separator: '\n'); |
157 | unsigned Major = ~0U; |
158 | unsigned Minor = ~0U; |
159 | for (auto Part : VersionParts) { |
160 | auto Splits = Part.rtrim().split(Separator: '='); |
161 | if (Splits.first == "HIP_VERSION_MAJOR" ) { |
162 | if (Splits.second.getAsInteger(Radix: 0, Result&: Major)) |
163 | return true; |
164 | } else if (Splits.first == "HIP_VERSION_MINOR" ) { |
165 | if (Splits.second.getAsInteger(Radix: 0, Result&: Minor)) |
166 | return true; |
167 | } else if (Splits.first == "HIP_VERSION_PATCH" ) |
168 | VersionPatch = Splits.second.str(); |
169 | } |
170 | if (Major == ~0U || Minor == ~0U) |
171 | return true; |
172 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
173 | DetectedVersion = |
174 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
175 | return false; |
176 | } |
177 | |
178 | /// \returns a list of candidate directories for ROCm installation, which is |
179 | /// cached and populated only once. |
180 | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
181 | RocmInstallationDetector::getInstallationPathCandidates() { |
182 | |
183 | // Return the cached candidate list if it has already been populated. |
184 | if (!ROCmSearchDirs.empty()) |
185 | return ROCmSearchDirs; |
186 | |
187 | auto DoPrintROCmSearchDirs = [&]() { |
188 | if (PrintROCmSearchDirs) |
189 | for (auto Cand : ROCmSearchDirs) { |
190 | llvm::errs() << "ROCm installation search path" ; |
191 | if (Cand.isSPACK()) |
192 | llvm::errs() << " (Spack " << Cand.SPACKReleaseStr << ")" ; |
193 | llvm::errs() << ": " << Cand.Path << '\n'; |
194 | } |
195 | }; |
196 | |
197 | // For candidate specified by --rocm-path we do not do strict check, i.e., |
198 | // checking existence of HIP version file and device library files. |
199 | if (!RocmPathArg.empty()) { |
200 | ROCmSearchDirs.emplace_back(Args: RocmPathArg.str()); |
201 | DoPrintROCmSearchDirs(); |
202 | return ROCmSearchDirs; |
203 | } else if (std::optional<std::string> RocmPathEnv = |
204 | llvm::sys::Process::GetEnv(name: "ROCM_PATH" )) { |
205 | if (!RocmPathEnv->empty()) { |
206 | ROCmSearchDirs.emplace_back(Args: std::move(*RocmPathEnv)); |
207 | DoPrintROCmSearchDirs(); |
208 | return ROCmSearchDirs; |
209 | } |
210 | } |
211 | |
212 | // Try to find relative to the compiler binary. |
213 | StringRef InstallDir = D.Dir; |
214 | |
215 | // Check both a normal Unix prefix position of the clang binary, as well as |
216 | // the Windows-esque layout the ROCm packages use with the host architecture |
217 | // subdirectory of bin. |
218 | auto DeduceROCmPath = [](StringRef ClangPath) { |
219 | // Strip off directory (usually bin) |
220 | StringRef ParentDir = llvm::sys::path::parent_path(path: ClangPath); |
221 | StringRef ParentName = llvm::sys::path::filename(path: ParentDir); |
222 | |
223 | // Some builds use bin/{host arch}, so go up again. |
224 | if (ParentName == "bin" ) { |
225 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
226 | ParentName = llvm::sys::path::filename(path: ParentDir); |
227 | } |
228 | |
229 | // Detect ROCm packages built with SPACK. |
230 | // clang is installed at |
231 | // <rocm_root>/llvm-amdgpu-<rocm_release_string>-<hash>/bin directory. |
232 | // We only consider the parent directory of llvm-amdgpu package as ROCm |
233 | // installation candidate for SPACK. |
234 | if (ParentName.starts_with(Prefix: "llvm-amdgpu-" )) { |
235 | auto SPACKPostfix = |
236 | ParentName.drop_front(N: strlen(s: "llvm-amdgpu-" )).split(Separator: '-'); |
237 | auto SPACKReleaseStr = SPACKPostfix.first; |
238 | if (!SPACKReleaseStr.empty()) { |
239 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
240 | return Candidate(ParentDir.str(), /*StrictChecking=*/true, |
241 | SPACKReleaseStr); |
242 | } |
243 | } |
244 | |
245 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
246 | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
247 | if (ParentName == "llvm" || ParentName.starts_with(Prefix: "aomp" )) |
248 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
249 | |
250 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
251 | }; |
252 | |
253 | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
254 | // link of clang itself. |
255 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(InstallDir)); |
256 | |
257 | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
258 | // link of clang itself. |
259 | llvm::SmallString<256> RealClangPath; |
260 | llvm::sys::fs::real_path(path: D.getClangProgramPath(), output&: RealClangPath); |
261 | auto ParentPath = llvm::sys::path::parent_path(path: RealClangPath); |
262 | if (ParentPath != InstallDir) |
263 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(ParentPath)); |
264 | |
265 | // Device library may be installed in clang or resource directory. |
266 | auto ClangRoot = llvm::sys::path::parent_path(path: InstallDir); |
267 | auto RealClangRoot = llvm::sys::path::parent_path(path: ParentPath); |
268 | ROCmSearchDirs.emplace_back(Args: ClangRoot.str(), /*StrictChecking=*/Args: true); |
269 | if (RealClangRoot != ClangRoot) |
270 | ROCmSearchDirs.emplace_back(Args: RealClangRoot.str(), /*StrictChecking=*/Args: true); |
271 | ROCmSearchDirs.emplace_back(Args: D.ResourceDir, |
272 | /*StrictChecking=*/Args: true); |
273 | |
274 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/rocm" , |
275 | /*StrictChecking=*/Args: true); |
276 | |
277 | // Find the latest /opt/rocm-{release} directory. |
278 | std::error_code EC; |
279 | std::string LatestROCm; |
280 | llvm::VersionTuple LatestVer; |
281 | // Get ROCm version from ROCm directory name. |
282 | auto GetROCmVersion = [](StringRef DirName) { |
283 | llvm::VersionTuple V; |
284 | std::string VerStr = DirName.drop_front(N: strlen(s: "rocm-" )).str(); |
285 | // The ROCm directory name follows the format of |
286 | // rocm-{major}.{minor}.{subMinor}[-{build}] |
287 | std::replace(first: VerStr.begin(), last: VerStr.end(), old_value: '-', new_value: '.'); |
288 | V.tryParse(string: VerStr); |
289 | return V; |
290 | }; |
291 | for (llvm::vfs::directory_iterator |
292 | File = D.getVFS().dir_begin(Dir: D.SysRoot + "/opt" , EC), |
293 | FileEnd; |
294 | File != FileEnd && !EC; File.increment(EC)) { |
295 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
296 | if (!FileName.starts_with(Prefix: "rocm-" )) |
297 | continue; |
298 | if (LatestROCm.empty()) { |
299 | LatestROCm = FileName.str(); |
300 | LatestVer = GetROCmVersion(LatestROCm); |
301 | continue; |
302 | } |
303 | auto Ver = GetROCmVersion(FileName); |
304 | if (LatestVer < Ver) { |
305 | LatestROCm = FileName.str(); |
306 | LatestVer = Ver; |
307 | } |
308 | } |
309 | if (!LatestROCm.empty()) |
310 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/" + LatestROCm, |
311 | /*StrictChecking=*/Args: true); |
312 | |
313 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr/local" , |
314 | /*StrictChecking=*/Args: true); |
315 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr" , |
316 | /*StrictChecking=*/Args: true); |
317 | |
318 | DoPrintROCmSearchDirs(); |
319 | return ROCmSearchDirs; |
320 | } |
321 | |
322 | RocmInstallationDetector::RocmInstallationDetector( |
323 | const Driver &D, const llvm::Triple &HostTriple, |
324 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
325 | : D(D) { |
326 | Verbose = Args.hasArg(Ids: options::OPT_v); |
327 | RocmPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_rocm_path_EQ); |
328 | PrintROCmSearchDirs = |
329 | Args.hasArg(Ids: clang::driver::options::OPT_print_rocm_search_dirs); |
330 | RocmDeviceLibPathArg = |
331 | Args.getAllArgValues(Id: clang::driver::options::OPT_rocm_device_lib_path_EQ); |
332 | HIPPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_hip_path_EQ); |
333 | HIPStdParPathArg = |
334 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_path_EQ); |
335 | HasHIPStdParLibrary = |
336 | !HIPStdParPathArg.empty() && D.getVFS().exists(Path: HIPStdParPathArg + |
337 | "/hipstdpar_lib.hpp" ); |
338 | HIPRocThrustPathArg = |
339 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_thrust_path_EQ); |
340 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
341 | D.getVFS().exists(Path: HIPRocThrustPathArg + "/thrust" ); |
342 | HIPRocPrimPathArg = |
343 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_prim_path_EQ); |
344 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
345 | D.getVFS().exists(Path: HIPRocPrimPathArg + "/rocprim" ); |
346 | |
347 | if (auto *A = Args.getLastArg(Ids: clang::driver::options::OPT_hip_version_EQ)) { |
348 | HIPVersionArg = A->getValue(); |
349 | unsigned Major = ~0U; |
350 | unsigned Minor = ~0U; |
351 | SmallVector<StringRef, 3> Parts; |
352 | HIPVersionArg.split(A&: Parts, Separator: '.'); |
353 | if (Parts.size()) |
354 | Parts[0].getAsInteger(Radix: 0, Result&: Major); |
355 | if (Parts.size() > 1) |
356 | Parts[1].getAsInteger(Radix: 0, Result&: Minor); |
357 | if (Parts.size() > 2) |
358 | VersionPatch = Parts[2].str(); |
359 | if (VersionPatch.empty()) |
360 | VersionPatch = "0" ; |
361 | if (Major != ~0U && Minor == ~0U) |
362 | Minor = 0; |
363 | if (Major == ~0U || Minor == ~0U) |
364 | D.Diag(DiagID: diag::err_drv_invalid_value) |
365 | << A->getAsString(Args) << HIPVersionArg; |
366 | |
367 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
368 | DetectedVersion = |
369 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
370 | } else { |
371 | VersionPatch = DefaultVersionPatch; |
372 | VersionMajorMinor = |
373 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
374 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
375 | Twine(DefaultVersionMinor) + "." + VersionPatch) |
376 | .str(); |
377 | } |
378 | |
379 | if (DetectHIPRuntime) |
380 | detectHIPRuntime(); |
381 | if (DetectDeviceLib) |
382 | detectDeviceLibrary(); |
383 | } |
384 | |
385 | void RocmInstallationDetector::detectDeviceLibrary() { |
386 | assert(LibDevicePath.empty()); |
387 | |
388 | if (!RocmDeviceLibPathArg.empty()) |
389 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
390 | else if (std::optional<std::string> LibPathEnv = |
391 | llvm::sys::Process::GetEnv(name: "HIP_DEVICE_LIB_PATH" )) |
392 | LibDevicePath = std::move(*LibPathEnv); |
393 | |
394 | auto &FS = D.getVFS(); |
395 | if (!LibDevicePath.empty()) { |
396 | // Maintain compatability with HIP flag/envvar pointing directly at the |
397 | // bitcode library directory. This points directly at the library path instead |
398 | // of the rocm root installation. |
399 | if (!FS.exists(Path: LibDevicePath)) |
400 | return; |
401 | |
402 | scanLibDevicePath(Path: LibDevicePath); |
403 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
404 | return; |
405 | } |
406 | |
407 | // Check device library exists at the given path. |
408 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
409 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
410 | if (CheckLibDevice && !FS.exists(Path)) |
411 | return false; |
412 | |
413 | scanLibDevicePath(Path); |
414 | |
415 | if (!NoBuiltinLibs) { |
416 | // Check that the required non-target libraries are all available. |
417 | if (!allGenericLibsValid()) |
418 | return false; |
419 | |
420 | // Check that we have found at least one libdevice that we can link in |
421 | // if -nobuiltinlib hasn't been specified. |
422 | if (LibDeviceMap.empty()) |
423 | return false; |
424 | } |
425 | return true; |
426 | }; |
427 | |
428 | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
429 | LibDevicePath = D.ResourceDir; |
430 | llvm::sys::path::append(path&: LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
431 | b: "amdgcn" , c: "bitcode" ); |
432 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
433 | if (HasDeviceLibrary) |
434 | return; |
435 | |
436 | // Find device libraries in a legacy ROCm directory structure |
437 | // ${ROCM_ROOT}/amdgcn/bitcode/* |
438 | auto &ROCmDirs = getInstallationPathCandidates(); |
439 | for (const auto &Candidate : ROCmDirs) { |
440 | LibDevicePath = Candidate.Path; |
441 | llvm::sys::path::append(path&: LibDevicePath, a: "amdgcn" , b: "bitcode" ); |
442 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
443 | if (HasDeviceLibrary) |
444 | return; |
445 | } |
446 | } |
447 | |
448 | void RocmInstallationDetector::detectHIPRuntime() { |
449 | SmallVector<Candidate, 4> HIPSearchDirs; |
450 | if (!HIPPathArg.empty()) |
451 | HIPSearchDirs.emplace_back(Args: HIPPathArg.str()); |
452 | else if (std::optional<std::string> HIPPathEnv = |
453 | llvm::sys::Process::GetEnv(name: "HIP_PATH" )) { |
454 | if (!HIPPathEnv->empty()) |
455 | HIPSearchDirs.emplace_back(Args: std::move(*HIPPathEnv)); |
456 | } |
457 | if (HIPSearchDirs.empty()) |
458 | HIPSearchDirs.append(RHS: getInstallationPathCandidates()); |
459 | auto &FS = D.getVFS(); |
460 | |
461 | for (const auto &Candidate : HIPSearchDirs) { |
462 | InstallPath = Candidate.Path; |
463 | if (InstallPath.empty() || !FS.exists(Path: InstallPath)) |
464 | continue; |
465 | // HIP runtime built by SPACK is installed to |
466 | // <rocm_root>/hip-<rocm_release_string>-<hash> directory. |
467 | auto SPACKPath = findSPACKPackage(Cand: Candidate, PackageName: "hip" ); |
468 | InstallPath = SPACKPath.empty() ? InstallPath : SPACKPath; |
469 | |
470 | BinPath = InstallPath; |
471 | llvm::sys::path::append(path&: BinPath, a: "bin" ); |
472 | IncludePath = InstallPath; |
473 | llvm::sys::path::append(path&: IncludePath, a: "include" ); |
474 | LibPath = InstallPath; |
475 | llvm::sys::path::append(path&: LibPath, a: "lib" ); |
476 | SharePath = InstallPath; |
477 | llvm::sys::path::append(path&: SharePath, a: "share" ); |
478 | |
479 | // Get parent of InstallPath and append "share" |
480 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(path: InstallPath); |
481 | llvm::sys::path::append(path&: ParentSharePath, a: "share" ); |
482 | |
483 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "" , |
484 | const Twine &c = "" , const Twine &d = "" ) { |
485 | SmallString<0> newpath = path; |
486 | llvm::sys::path::append(path&: newpath, a, b, c, d); |
487 | return newpath; |
488 | }; |
489 | // If HIP version file can be found and parsed, use HIP version from there. |
490 | std::vector<SmallString<0>> VersionFilePaths = { |
491 | Append(SharePath, "hip" , "version" ), |
492 | InstallPath != D.SysRoot + "/usr/local" |
493 | ? Append(ParentSharePath, "hip" , "version" ) |
494 | : SmallString<0>(), |
495 | Append(BinPath, ".hipVersion" )}; |
496 | |
497 | for (const auto &VersionFilePath : VersionFilePaths) { |
498 | if (VersionFilePath.empty()) |
499 | continue; |
500 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
501 | FS.getBufferForFile(Name: VersionFilePath); |
502 | if (!VersionFile) |
503 | continue; |
504 | if (HIPVersionArg.empty() && VersionFile) |
505 | if (parseHIPVersionFile(V: (*VersionFile)->getBuffer())) |
506 | continue; |
507 | |
508 | HasHIPRuntime = true; |
509 | return; |
510 | } |
511 | // Otherwise, if -rocm-path is specified (no strict checking), use the |
512 | // default HIP version or specified by --hip-version. |
513 | if (!Candidate.StrictChecking) { |
514 | HasHIPRuntime = true; |
515 | return; |
516 | } |
517 | } |
518 | HasHIPRuntime = false; |
519 | } |
520 | |
521 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
522 | if (hasHIPRuntime()) |
523 | OS << "Found HIP installation: " << InstallPath << ", version " |
524 | << DetectedVersion << '\n'; |
525 | } |
526 | |
527 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
528 | ArgStringList &CC1Args) const { |
529 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
530 | !DriverArgs.hasArg(Ids: options::OPT_nohipwrapperinc); |
531 | bool HasHipStdPar = DriverArgs.hasArg(Ids: options::OPT_hipstdpar); |
532 | |
533 | if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) { |
534 | // HIP header includes standard library wrapper headers under clang |
535 | // cuda_wrappers directory. Since these wrapper headers include_next |
536 | // standard C++ headers, whereas libc++ headers include_next other clang |
537 | // headers. The include paths have to follow this order: |
538 | // - wrapper include path |
539 | // - standard C++ include path |
540 | // - other clang include path |
541 | // Since standard C++ and other clang include paths are added in other |
542 | // places after this function, here we only need to make sure wrapper |
543 | // include path is added. |
544 | // |
545 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
546 | // a workaround. |
547 | SmallString<128> P(D.ResourceDir); |
548 | if (UsesRuntimeWrapper) |
549 | llvm::sys::path::append(path&: P, a: "include" , b: "cuda_wrappers" ); |
550 | CC1Args.push_back(Elt: "-internal-isystem" ); |
551 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P)); |
552 | } |
553 | |
554 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
555 | StringRef Inc = getIncludePath(); |
556 | auto &FS = D.getVFS(); |
557 | |
558 | if (!hasHIPStdParLibrary()) |
559 | if (!HIPStdParPathArg.empty() || |
560 | !FS.exists(Path: Inc + "/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp" )) { |
561 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_lib); |
562 | return; |
563 | } |
564 | if (!HasRocThrustLibrary && !FS.exists(Path: Inc + "/thrust" )) { |
565 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_thrust_lib); |
566 | return; |
567 | } |
568 | if (!HasRocPrimLibrary && !FS.exists(Path: Inc + "/rocprim" )) { |
569 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_prim_lib); |
570 | return; |
571 | } |
572 | const char *ThrustPath; |
573 | if (HasRocThrustLibrary) |
574 | ThrustPath = DriverArgs.MakeArgString(Str: HIPRocThrustPathArg); |
575 | else |
576 | ThrustPath = DriverArgs.MakeArgString(Str: Inc + "/thrust" ); |
577 | |
578 | const char *HIPStdParPath; |
579 | if (hasHIPStdParLibrary()) |
580 | HIPStdParPath = DriverArgs.MakeArgString(Str: HIPStdParPathArg); |
581 | else |
582 | HIPStdParPath = DriverArgs.MakeArgString(Str: StringRef(ThrustPath) + |
583 | "/system/hip/hipstdpar" ); |
584 | |
585 | const char *PrimPath; |
586 | if (HasRocPrimLibrary) |
587 | PrimPath = DriverArgs.MakeArgString(Str: HIPRocPrimPathArg); |
588 | else |
589 | PrimPath = DriverArgs.MakeArgString(Str: getIncludePath() + "/rocprim" ); |
590 | |
591 | CC1Args.append(IL: {"-idirafter" , ThrustPath, "-idirafter" , PrimPath, |
592 | "-idirafter" , HIPStdParPath, "-include" , |
593 | "hipstdpar_lib.hpp" }); |
594 | }; |
595 | |
596 | if (DriverArgs.hasArg(Ids: options::OPT_nogpuinc)) { |
597 | if (HasHipStdPar) |
598 | HandleHipStdPar(); |
599 | |
600 | return; |
601 | } |
602 | |
603 | if (!hasHIPRuntime()) { |
604 | D.Diag(DiagID: diag::err_drv_no_hip_runtime); |
605 | return; |
606 | } |
607 | |
608 | CC1Args.push_back(Elt: "-idirafter" ); |
609 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: getIncludePath())); |
610 | if (UsesRuntimeWrapper) |
611 | CC1Args.append(IL: {"-include" , "__clang_hip_runtime_wrapper.h" }); |
612 | if (HasHipStdPar) |
613 | HandleHipStdPar(); |
614 | } |
615 | |
616 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
617 | const InputInfo &Output, |
618 | const InputInfoList &Inputs, |
619 | const ArgList &Args, |
620 | const char *LinkingOutput) const { |
621 | std::string Linker = getToolChain().GetLinkerPath(); |
622 | ArgStringList CmdArgs; |
623 | CmdArgs.push_back(Elt: "--no-undefined" ); |
624 | CmdArgs.push_back(Elt: "-shared" ); |
625 | |
626 | addLinkerCompressDebugSectionsOption(TC: getToolChain(), Args, CmdArgs); |
627 | Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L); |
628 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
629 | AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA); |
630 | if (C.getDriver().isUsingLTO()) |
631 | addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Input: Inputs[0], |
632 | IsThinLTO: C.getDriver().getLTOMode() == LTOK_Thin); |
633 | else if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) |
634 | CmdArgs.push_back(Elt: Args.MakeArgString( |
635 | Str: "-plugin-opt=mcpu=" + Args.getLastArgValue(Id: options::OPT_mcpu_EQ))); |
636 | CmdArgs.push_back(Elt: "-o" ); |
637 | CmdArgs.push_back(Elt: Output.getFilename()); |
638 | C.addCommand(C: std::make_unique<Command>( |
639 | args: JA, args: *this, args: ResponseFileSupport::AtFileCurCP(), args: Args.MakeArgString(Str: Linker), |
640 | args&: CmdArgs, args: Inputs, args: Output)); |
641 | } |
642 | |
643 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
644 | const llvm::Triple &Triple, |
645 | const llvm::opt::ArgList &Args, |
646 | std::vector<StringRef> &Features) { |
647 | // Add target ID features to -target-feature options. No diagnostics should |
648 | // be emitted here since invalid target ID is diagnosed at other places. |
649 | StringRef TargetID; |
650 | if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) |
651 | TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ); |
652 | else if (Args.hasArg(Ids: options::OPT_march_EQ)) |
653 | TargetID = Args.getLastArgValue(Id: options::OPT_march_EQ); |
654 | if (!TargetID.empty()) { |
655 | llvm::StringMap<bool> FeatureMap; |
656 | auto OptionalGpuArch = parseTargetID(T: Triple, OffloadArch: TargetID, FeatureMap: &FeatureMap); |
657 | if (OptionalGpuArch) { |
658 | StringRef GpuArch = *OptionalGpuArch; |
659 | // Iterate through all possible target ID features for the given GPU. |
660 | // If it is mapped to true, add +feature. |
661 | // If it is mapped to false, add -feature. |
662 | // If it is not in the map (default), do not add it |
663 | for (auto &&Feature : getAllPossibleTargetIDFeatures(T: Triple, Processor: GpuArch)) { |
664 | auto Pos = FeatureMap.find(Key: Feature); |
665 | if (Pos == FeatureMap.end()) |
666 | continue; |
667 | Features.push_back(x: Args.MakeArgStringRef( |
668 | Str: (Twine(Pos->second ? "+" : "-" ) + Feature).str())); |
669 | } |
670 | } |
671 | } |
672 | |
673 | if (Args.hasFlag(Pos: options::OPT_mwavefrontsize64, |
674 | Neg: options::OPT_mno_wavefrontsize64, Default: false)) |
675 | Features.push_back(x: "+wavefrontsize64" ); |
676 | |
677 | if (Args.hasFlag(Pos: options::OPT_mamdgpu_precise_memory_op, |
678 | Neg: options::OPT_mno_amdgpu_precise_memory_op, Default: false)) |
679 | Features.push_back(x: "+precise-memory" ); |
680 | |
681 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
682 | Group: options::OPT_m_amdgpu_Features_Group); |
683 | } |
684 | |
685 | /// AMDGPU Toolchain |
686 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
687 | const ArgList &Args) |
688 | : Generic_ELF(D, Triple, Args), |
689 | OptionsDefault( |
690 | {{options::OPT_O, "3" }, {options::OPT_cl_std_EQ, "CL1.2" }}) { |
691 | // Check code object version options. Emit warnings for legacy options |
692 | // and errors for the last invalid code object version options. |
693 | // It is done here to avoid repeated warning or error messages for |
694 | // each tool invocation. |
695 | checkAMDGPUCodeObjectVersion(D, Args); |
696 | } |
697 | |
698 | Tool *AMDGPUToolChain::buildLinker() const { |
699 | return new tools::amdgpu::Linker(*this); |
700 | } |
701 | |
702 | DerivedArgList * |
703 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
704 | Action::OffloadKind DeviceOffloadKind) const { |
705 | |
706 | DerivedArgList *DAL = |
707 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
708 | |
709 | const OptTable &Opts = getDriver().getOpts(); |
710 | |
711 | if (!DAL) |
712 | DAL = new DerivedArgList(Args.getBaseArgs()); |
713 | |
714 | for (Arg *A : Args) |
715 | DAL->append(A); |
716 | |
717 | // Replace -mcpu=native with detected GPU. |
718 | Arg *LastMCPUArg = DAL->getLastArg(Ids: options::OPT_mcpu_EQ); |
719 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native" ) { |
720 | DAL->eraseArg(Id: options::OPT_mcpu_EQ); |
721 | auto GPUsOrErr = getSystemGPUArchs(Args); |
722 | if (!GPUsOrErr) { |
723 | getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch) |
724 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
725 | << llvm::toString(E: GPUsOrErr.takeError()) << "-mcpu" ; |
726 | } else { |
727 | auto &GPUs = *GPUsOrErr; |
728 | if (GPUs.size() > 1) { |
729 | getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch) |
730 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
731 | << llvm::join(R&: GPUs, Separator: ", " ) << "-mcpu" ; |
732 | } |
733 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_mcpu_EQ), |
734 | Value: Args.MakeArgString(Str: GPUs.front())); |
735 | } |
736 | } |
737 | |
738 | checkTargetID(DriverArgs: *DAL); |
739 | |
740 | if (Args.getLastArgValue(Id: options::OPT_x) != "cl" ) |
741 | return DAL; |
742 | |
743 | // Phase 1 (.cl -> .bc) |
744 | if (Args.hasArg(Ids: options::OPT_c) && Args.hasArg(Ids: options::OPT_emit_llvm)) { |
745 | DAL->AddFlagArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: getTriple().isArch64Bit() |
746 | ? options::OPT_m64 |
747 | : options::OPT_m32)); |
748 | |
749 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
750 | // as they defined that way in Options.td |
751 | if (!Args.hasArg(Ids: options::OPT_O, Ids: options::OPT_O0, Ids: options::OPT_O4, |
752 | Ids: options::OPT_Ofast)) |
753 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_O), |
754 | Value: getOptionDefault(OptID: options::OPT_O)); |
755 | } |
756 | |
757 | return DAL; |
758 | } |
759 | |
760 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
761 | llvm::AMDGPU::GPUKind Kind) { |
762 | |
763 | // Assume nothing without a specific target. |
764 | if (Kind == llvm::AMDGPU::GK_NONE) |
765 | return false; |
766 | |
767 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
768 | |
769 | // Default to enabling f32 denormals by default on subtargets where fma is |
770 | // fast with denormals |
771 | const bool BothDenormAndFMAFast = |
772 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
773 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
774 | return !BothDenormAndFMAFast; |
775 | } |
776 | |
777 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
778 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
779 | const llvm::fltSemantics *FPType) const { |
780 | // Denormals should always be enabled for f16 and f64. |
781 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
782 | return llvm::DenormalMode::getIEEE(); |
783 | |
784 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
785 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
786 | auto Arch = getProcessorFromTargetID(T: getTriple(), OffloadArch: JA.getOffloadingArch()); |
787 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: Arch); |
788 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
789 | DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
790 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
791 | Default: getDefaultDenormsAreZeroForTarget(Kind))) |
792 | return llvm::DenormalMode::getPreserveSign(); |
793 | |
794 | return llvm::DenormalMode::getIEEE(); |
795 | } |
796 | |
797 | const StringRef GpuArch = getGPUArch(DriverArgs); |
798 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
799 | |
800 | // TODO: There are way too many flags that change this. Do we need to check |
801 | // them all? |
802 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
803 | getDefaultDenormsAreZeroForTarget(Kind); |
804 | |
805 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
806 | // also implicit treated as zero (DAZ). |
807 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
808 | llvm::DenormalMode::getIEEE(); |
809 | } |
810 | |
811 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
812 | llvm::AMDGPU::GPUKind Kind) { |
813 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
814 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
815 | |
816 | return !HasWave32 || DriverArgs.hasFlag( |
817 | Pos: options::OPT_mwavefrontsize64, Neg: options::OPT_mno_wavefrontsize64, Default: false); |
818 | } |
819 | |
820 | |
821 | /// ROCM Toolchain |
822 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
823 | const ArgList &Args) |
824 | : AMDGPUToolChain(D, Triple, Args) { |
825 | RocmInstallation->detectDeviceLibrary(); |
826 | } |
827 | |
828 | void AMDGPUToolChain::addClangTargetOptions( |
829 | const llvm::opt::ArgList &DriverArgs, |
830 | llvm::opt::ArgStringList &CC1Args, |
831 | Action::OffloadKind DeviceOffloadingKind) const { |
832 | // Default to "hidden" visibility, as object level linking will not be |
833 | // supported for the foreseeable future. |
834 | if (!DriverArgs.hasArg(Ids: options::OPT_fvisibility_EQ, |
835 | Ids: options::OPT_fvisibility_ms_compat)) { |
836 | CC1Args.push_back(Elt: "-fvisibility=hidden" ); |
837 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
838 | } |
839 | } |
840 | |
841 | void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
842 | // AMDGPU does not support atomic lib call. Treat atomic alignment |
843 | // warnings as errors. |
844 | CC1Args.push_back(Elt: "-Werror=atomic-alignment" ); |
845 | } |
846 | |
847 | StringRef |
848 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
849 | return getProcessorFromTargetID( |
850 | T: getTriple(), OffloadArch: DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ)); |
851 | } |
852 | |
853 | AMDGPUToolChain::ParsedTargetIDType |
854 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
855 | StringRef TargetID = DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ); |
856 | if (TargetID.empty()) |
857 | return {.OptionalTargetID: std::nullopt, .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
858 | |
859 | llvm::StringMap<bool> FeatureMap; |
860 | auto OptionalGpuArch = parseTargetID(T: getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
861 | if (!OptionalGpuArch) |
862 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
863 | |
864 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: OptionalGpuArch->str(), .OptionalFeatures: FeatureMap}; |
865 | } |
866 | |
867 | void AMDGPUToolChain::checkTargetID( |
868 | const llvm::opt::ArgList &DriverArgs) const { |
869 | auto PTID = getParsedTargetID(DriverArgs); |
870 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
871 | getDriver().Diag(DiagID: clang::diag::err_drv_bad_target_id) |
872 | << *PTID.OptionalTargetID; |
873 | } |
874 | } |
875 | |
876 | Expected<SmallVector<std::string>> |
877 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
878 | // Detect AMD GPUs availible on the system. |
879 | std::string Program; |
880 | if (Arg *A = Args.getLastArg(Ids: options::OPT_amdgpu_arch_tool_EQ)) |
881 | Program = A->getValue(); |
882 | else |
883 | Program = GetProgramPath(Name: "amdgpu-arch" ); |
884 | |
885 | auto StdoutOrErr = executeToolChainProgram(Executable: Program, /*SecondsToWait=*/10); |
886 | if (!StdoutOrErr) |
887 | return StdoutOrErr.takeError(); |
888 | |
889 | SmallVector<std::string, 1> GPUArchs; |
890 | for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n" )) |
891 | if (!Arch.empty()) |
892 | GPUArchs.push_back(Elt: Arch.str()); |
893 | |
894 | if (GPUArchs.empty()) |
895 | return llvm::createStringError(EC: std::error_code(), |
896 | S: "No AMD GPU detected in the system" ); |
897 | |
898 | return std::move(GPUArchs); |
899 | } |
900 | |
901 | void ROCMToolChain::addClangTargetOptions( |
902 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
903 | Action::OffloadKind DeviceOffloadingKind) const { |
904 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
905 | DeviceOffloadingKind); |
906 | |
907 | // For the OpenCL case where there is no offload target, accept -nostdlib to |
908 | // disable bitcode linking. |
909 | if (DeviceOffloadingKind == Action::OFK_None && |
910 | DriverArgs.hasArg(Ids: options::OPT_nostdlib)) |
911 | return; |
912 | |
913 | if (DriverArgs.hasArg(Ids: options::OPT_nogpulib)) |
914 | return; |
915 | |
916 | // Get the device name and canonicalize it |
917 | const StringRef GpuArch = getGPUArch(DriverArgs); |
918 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
919 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
920 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
921 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
922 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
923 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
924 | ABIVer)) |
925 | return; |
926 | |
927 | bool Wave64 = isWave64(DriverArgs, Kind); |
928 | |
929 | // TODO: There are way too many flags that change this. Do we need to check |
930 | // them all? |
931 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
932 | getDefaultDenormsAreZeroForTarget(Kind); |
933 | bool FiniteOnly = DriverArgs.hasArg(Ids: options::OPT_cl_finite_math_only); |
934 | |
935 | bool UnsafeMathOpt = |
936 | DriverArgs.hasArg(Ids: options::OPT_cl_unsafe_math_optimizations); |
937 | bool FastRelaxedMath = DriverArgs.hasArg(Ids: options::OPT_cl_fast_relaxed_math); |
938 | bool CorrectSqrt = |
939 | DriverArgs.hasArg(Ids: options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
940 | |
941 | // Add the OpenCL specific bitcode library. |
942 | llvm::SmallVector<std::string, 12> BCLibs; |
943 | BCLibs.push_back(Elt: RocmInstallation->getOpenCLPath().str()); |
944 | |
945 | // Add the generic set of libraries. |
946 | BCLibs.append(RHS: RocmInstallation->getCommonBitcodeLibs( |
947 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
948 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP: false)); |
949 | |
950 | if (getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt()) { |
951 | CC1Args.push_back(Elt: "-mlink-bitcode-file" ); |
952 | CC1Args.push_back( |
953 | Elt: DriverArgs.MakeArgString(Str: RocmInstallation->getAsanRTLPath())); |
954 | } |
955 | for (StringRef BCFile : BCLibs) { |
956 | CC1Args.push_back(Elt: "-mlink-builtin-bitcode" ); |
957 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile)); |
958 | } |
959 | } |
960 | |
961 | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
962 | StringRef GPUArch, StringRef LibDeviceFile, |
963 | DeviceLibABIVersion ABIVer) const { |
964 | if (!hasDeviceLibrary()) { |
965 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 0; |
966 | return false; |
967 | } |
968 | if (LibDeviceFile.empty()) { |
969 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
970 | return false; |
971 | } |
972 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
973 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString(); |
974 | return false; |
975 | } |
976 | return true; |
977 | } |
978 | |
979 | llvm::SmallVector<std::string, 12> |
980 | RocmInstallationDetector::getCommonBitcodeLibs( |
981 | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
982 | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
983 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool isOpenMP = false) const { |
984 | llvm::SmallVector<std::string, 12> BCLibs; |
985 | |
986 | auto AddBCLib = [&](StringRef BCFile) { BCLibs.push_back(Elt: BCFile.str()); }; |
987 | |
988 | AddBCLib(getOCMLPath()); |
989 | if (!isOpenMP) |
990 | AddBCLib(getOCKLPath()); |
991 | AddBCLib(getDenormalsAreZeroPath(Enabled: DAZ)); |
992 | AddBCLib(getUnsafeMathPath(Enabled: UnsafeMathOpt || FastRelaxedMath)); |
993 | AddBCLib(getFiniteOnlyPath(Enabled: FiniteOnly || FastRelaxedMath)); |
994 | AddBCLib(getCorrectlyRoundedSqrtPath(Enabled: CorrectSqrt)); |
995 | AddBCLib(getWavefrontSize64Path(Enabled: Wave64)); |
996 | AddBCLib(LibDeviceFile); |
997 | auto ABIVerPath = getABIVersionPath(ABIVer); |
998 | if (!ABIVerPath.empty()) |
999 | AddBCLib(ABIVerPath); |
1000 | |
1001 | return BCLibs; |
1002 | } |
1003 | |
1004 | llvm::SmallVector<std::string, 12> |
1005 | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
1006 | const std::string &GPUArch, |
1007 | bool isOpenMP) const { |
1008 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GPUArch); |
1009 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
1010 | |
1011 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
1012 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
1013 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
1014 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
1015 | ABIVer)) |
1016 | return {}; |
1017 | |
1018 | // If --hip-device-lib is not set, add the default bitcode libraries. |
1019 | // TODO: There are way too many flags that change this. Do we need to check |
1020 | // them all? |
1021 | bool DAZ = DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
1022 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
1023 | Default: getDefaultDenormsAreZeroForTarget(Kind)); |
1024 | bool FiniteOnly = DriverArgs.hasFlag( |
1025 | Pos: options::OPT_ffinite_math_only, Neg: options::OPT_fno_finite_math_only, Default: false); |
1026 | bool UnsafeMathOpt = |
1027 | DriverArgs.hasFlag(Pos: options::OPT_funsafe_math_optimizations, |
1028 | Neg: options::OPT_fno_unsafe_math_optimizations, Default: false); |
1029 | bool FastRelaxedMath = DriverArgs.hasFlag(Pos: options::OPT_ffast_math, |
1030 | Neg: options::OPT_fno_fast_math, Default: false); |
1031 | bool CorrectSqrt = DriverArgs.hasFlag( |
1032 | Pos: options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
1033 | Neg: options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, Default: true); |
1034 | bool Wave64 = isWave64(DriverArgs, Kind); |
1035 | |
1036 | return RocmInstallation->getCommonBitcodeLibs( |
1037 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
1038 | FastRelaxedMath, CorrectSqrt, ABIVer, isOpenMP); |
1039 | } |
1040 | |