1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPU.h" |
10 | #include "clang/Basic/TargetID.h" |
11 | #include "clang/Config/config.h" |
12 | #include "clang/Driver/CommonArgs.h" |
13 | #include "clang/Driver/Compilation.h" |
14 | #include "clang/Driver/InputInfo.h" |
15 | #include "clang/Driver/Options.h" |
16 | #include "clang/Driver/SanitizerArgs.h" |
17 | #include "llvm/ADT/StringExtras.h" |
18 | #include "llvm/Option/ArgList.h" |
19 | #include "llvm/Support/Error.h" |
20 | #include "llvm/Support/LineIterator.h" |
21 | #include "llvm/Support/Path.h" |
22 | #include "llvm/Support/Process.h" |
23 | #include "llvm/Support/VirtualFileSystem.h" |
24 | #include "llvm/TargetParser/Host.h" |
25 | #include <optional> |
26 | #include <system_error> |
27 | |
28 | using namespace clang::driver; |
29 | using namespace clang::driver::tools; |
30 | using namespace clang::driver::toolchains; |
31 | using namespace clang; |
32 | using namespace llvm::opt; |
33 | |
34 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
35 | assert(!Path.empty()); |
36 | |
37 | const StringRef Suffix(".bc" ); |
38 | const StringRef Suffix2(".amdgcn.bc" ); |
39 | |
40 | std::error_code EC; |
41 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Dir: Path, EC), LE; |
42 | !EC && LI != LE; LI = LI.increment(EC)) { |
43 | StringRef FilePath = LI->path(); |
44 | StringRef FileName = llvm::sys::path::filename(path: FilePath); |
45 | if (!FileName.ends_with(Suffix)) |
46 | continue; |
47 | |
48 | StringRef BaseName; |
49 | if (FileName.ends_with(Suffix: Suffix2)) |
50 | BaseName = FileName.drop_back(N: Suffix2.size()); |
51 | else if (FileName.ends_with(Suffix)) |
52 | BaseName = FileName.drop_back(N: Suffix.size()); |
53 | |
54 | const StringRef ABIVersionPrefix = "oclc_abi_version_" ; |
55 | if (BaseName == "ocml" ) { |
56 | OCML = FilePath; |
57 | } else if (BaseName == "ockl" ) { |
58 | OCKL = FilePath; |
59 | } else if (BaseName == "opencl" ) { |
60 | OpenCL = FilePath; |
61 | } else if (BaseName == "asanrtl" ) { |
62 | AsanRTL = FilePath; |
63 | } else if (BaseName == "oclc_finite_only_off" ) { |
64 | FiniteOnly.Off = FilePath; |
65 | } else if (BaseName == "oclc_finite_only_on" ) { |
66 | FiniteOnly.On = FilePath; |
67 | } else if (BaseName == "oclc_daz_opt_on" ) { |
68 | DenormalsAreZero.On = FilePath; |
69 | } else if (BaseName == "oclc_daz_opt_off" ) { |
70 | DenormalsAreZero.Off = FilePath; |
71 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on" ) { |
72 | CorrectlyRoundedSqrt.On = FilePath; |
73 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off" ) { |
74 | CorrectlyRoundedSqrt.Off = FilePath; |
75 | } else if (BaseName == "oclc_unsafe_math_on" ) { |
76 | UnsafeMath.On = FilePath; |
77 | } else if (BaseName == "oclc_unsafe_math_off" ) { |
78 | UnsafeMath.Off = FilePath; |
79 | } else if (BaseName == "oclc_wavefrontsize64_on" ) { |
80 | WavefrontSize64.On = FilePath; |
81 | } else if (BaseName == "oclc_wavefrontsize64_off" ) { |
82 | WavefrontSize64.Off = FilePath; |
83 | } else if (BaseName.starts_with(Prefix: ABIVersionPrefix)) { |
84 | unsigned ABIVersionNumber; |
85 | if (BaseName.drop_front(N: ABIVersionPrefix.size()) |
86 | .getAsInteger(/*Redex=*/Radix: 0, Result&: ABIVersionNumber)) |
87 | continue; |
88 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
89 | } else { |
90 | // Process all bitcode filenames that look like |
91 | // ocl_isa_version_XXX.amdgcn.bc |
92 | const StringRef DeviceLibPrefix = "oclc_isa_version_" ; |
93 | if (!BaseName.starts_with(Prefix: DeviceLibPrefix)) |
94 | continue; |
95 | |
96 | StringRef IsaVersionNumber = |
97 | BaseName.drop_front(N: DeviceLibPrefix.size()); |
98 | |
99 | llvm::Twine GfxName = Twine("gfx" ) + IsaVersionNumber; |
100 | SmallString<8> Tmp; |
101 | LibDeviceMap.insert( |
102 | KV: std::make_pair(x: GfxName.toStringRef(Out&: Tmp), y: FilePath.str())); |
103 | } |
104 | } |
105 | } |
106 | |
107 | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
108 | // the parsing fails. |
109 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
110 | SmallVector<StringRef, 4> VersionParts; |
111 | V.split(A&: VersionParts, Separator: '\n'); |
112 | unsigned Major = ~0U; |
113 | unsigned Minor = ~0U; |
114 | for (auto Part : VersionParts) { |
115 | auto Splits = Part.rtrim().split(Separator: '='); |
116 | if (Splits.first == "HIP_VERSION_MAJOR" ) { |
117 | if (Splits.second.getAsInteger(Radix: 0, Result&: Major)) |
118 | return true; |
119 | } else if (Splits.first == "HIP_VERSION_MINOR" ) { |
120 | if (Splits.second.getAsInteger(Radix: 0, Result&: Minor)) |
121 | return true; |
122 | } else if (Splits.first == "HIP_VERSION_PATCH" ) |
123 | VersionPatch = Splits.second.str(); |
124 | } |
125 | if (Major == ~0U || Minor == ~0U) |
126 | return true; |
127 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
128 | DetectedVersion = |
129 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
130 | return false; |
131 | } |
132 | |
133 | /// \returns a list of candidate directories for ROCm installation, which is |
134 | /// cached and populated only once. |
135 | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
136 | RocmInstallationDetector::getInstallationPathCandidates() { |
137 | |
138 | // Return the cached candidate list if it has already been populated. |
139 | if (!ROCmSearchDirs.empty()) |
140 | return ROCmSearchDirs; |
141 | |
142 | auto DoPrintROCmSearchDirs = [&]() { |
143 | if (PrintROCmSearchDirs) |
144 | for (auto Cand : ROCmSearchDirs) { |
145 | llvm::errs() << "ROCm installation search path: " << Cand.Path << '\n'; |
146 | } |
147 | }; |
148 | |
149 | // For candidate specified by --rocm-path we do not do strict check, i.e., |
150 | // checking existence of HIP version file and device library files. |
151 | if (!RocmPathArg.empty()) { |
152 | ROCmSearchDirs.emplace_back(Args: RocmPathArg.str()); |
153 | DoPrintROCmSearchDirs(); |
154 | return ROCmSearchDirs; |
155 | } else if (std::optional<std::string> RocmPathEnv = |
156 | llvm::sys::Process::GetEnv(name: "ROCM_PATH" )) { |
157 | if (!RocmPathEnv->empty()) { |
158 | ROCmSearchDirs.emplace_back(Args: std::move(*RocmPathEnv)); |
159 | DoPrintROCmSearchDirs(); |
160 | return ROCmSearchDirs; |
161 | } |
162 | } |
163 | |
164 | // Try to find relative to the compiler binary. |
165 | StringRef InstallDir = D.Dir; |
166 | |
167 | // Check both a normal Unix prefix position of the clang binary, as well as |
168 | // the Windows-esque layout the ROCm packages use with the host architecture |
169 | // subdirectory of bin. |
170 | auto DeduceROCmPath = [](StringRef ClangPath) { |
171 | // Strip off directory (usually bin) |
172 | StringRef ParentDir = llvm::sys::path::parent_path(path: ClangPath); |
173 | StringRef ParentName = llvm::sys::path::filename(path: ParentDir); |
174 | |
175 | // Some builds use bin/{host arch}, so go up again. |
176 | if (ParentName == "bin" ) { |
177 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
178 | ParentName = llvm::sys::path::filename(path: ParentDir); |
179 | } |
180 | |
181 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
182 | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
183 | if (ParentName == "llvm" || ParentName.starts_with(Prefix: "aomp" )) |
184 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
185 | |
186 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
187 | }; |
188 | |
189 | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
190 | // link of clang itself. |
191 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(InstallDir)); |
192 | |
193 | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
194 | // link of clang itself. |
195 | llvm::SmallString<256> RealClangPath; |
196 | llvm::sys::fs::real_path(path: D.getClangProgramPath(), output&: RealClangPath); |
197 | auto ParentPath = llvm::sys::path::parent_path(path: RealClangPath); |
198 | if (ParentPath != InstallDir) |
199 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(ParentPath)); |
200 | |
201 | // Device library may be installed in clang or resource directory. |
202 | auto ClangRoot = llvm::sys::path::parent_path(path: InstallDir); |
203 | auto RealClangRoot = llvm::sys::path::parent_path(path: ParentPath); |
204 | ROCmSearchDirs.emplace_back(Args: ClangRoot.str(), /*StrictChecking=*/Args: true); |
205 | if (RealClangRoot != ClangRoot) |
206 | ROCmSearchDirs.emplace_back(Args: RealClangRoot.str(), /*StrictChecking=*/Args: true); |
207 | ROCmSearchDirs.emplace_back(Args: D.ResourceDir, |
208 | /*StrictChecking=*/Args: true); |
209 | |
210 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/rocm" , |
211 | /*StrictChecking=*/Args: true); |
212 | |
213 | // Find the latest /opt/rocm-{release} directory. |
214 | std::error_code EC; |
215 | std::string LatestROCm; |
216 | llvm::VersionTuple LatestVer; |
217 | // Get ROCm version from ROCm directory name. |
218 | auto GetROCmVersion = [](StringRef DirName) { |
219 | llvm::VersionTuple V; |
220 | std::string VerStr = DirName.drop_front(N: strlen(s: "rocm-" )).str(); |
221 | // The ROCm directory name follows the format of |
222 | // rocm-{major}.{minor}.{subMinor}[-{build}] |
223 | llvm::replace(Range&: VerStr, OldValue: '-', NewValue: '.'); |
224 | V.tryParse(string: VerStr); |
225 | return V; |
226 | }; |
227 | for (llvm::vfs::directory_iterator |
228 | File = D.getVFS().dir_begin(Dir: D.SysRoot + "/opt" , EC), |
229 | FileEnd; |
230 | File != FileEnd && !EC; File.increment(EC)) { |
231 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
232 | if (!FileName.starts_with(Prefix: "rocm-" )) |
233 | continue; |
234 | if (LatestROCm.empty()) { |
235 | LatestROCm = FileName.str(); |
236 | LatestVer = GetROCmVersion(LatestROCm); |
237 | continue; |
238 | } |
239 | auto Ver = GetROCmVersion(FileName); |
240 | if (LatestVer < Ver) { |
241 | LatestROCm = FileName.str(); |
242 | LatestVer = Ver; |
243 | } |
244 | } |
245 | if (!LatestROCm.empty()) |
246 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/" + LatestROCm, |
247 | /*StrictChecking=*/Args: true); |
248 | |
249 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr/local" , |
250 | /*StrictChecking=*/Args: true); |
251 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr" , |
252 | /*StrictChecking=*/Args: true); |
253 | |
254 | DoPrintROCmSearchDirs(); |
255 | return ROCmSearchDirs; |
256 | } |
257 | |
258 | RocmInstallationDetector::RocmInstallationDetector( |
259 | const Driver &D, const llvm::Triple &HostTriple, |
260 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
261 | : D(D) { |
262 | Verbose = Args.hasArg(Ids: options::OPT_v); |
263 | RocmPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_rocm_path_EQ); |
264 | PrintROCmSearchDirs = |
265 | Args.hasArg(Ids: clang::driver::options::OPT_print_rocm_search_dirs); |
266 | RocmDeviceLibPathArg = |
267 | Args.getAllArgValues(Id: clang::driver::options::OPT_rocm_device_lib_path_EQ); |
268 | HIPPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_hip_path_EQ); |
269 | HIPStdParPathArg = |
270 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_path_EQ); |
271 | HasHIPStdParLibrary = |
272 | !HIPStdParPathArg.empty() && D.getVFS().exists(Path: HIPStdParPathArg + |
273 | "/hipstdpar_lib.hpp" ); |
274 | HIPRocThrustPathArg = |
275 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_thrust_path_EQ); |
276 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
277 | D.getVFS().exists(Path: HIPRocThrustPathArg + "/thrust" ); |
278 | HIPRocPrimPathArg = |
279 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_prim_path_EQ); |
280 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
281 | D.getVFS().exists(Path: HIPRocPrimPathArg + "/rocprim" ); |
282 | |
283 | if (auto *A = Args.getLastArg(Ids: clang::driver::options::OPT_hip_version_EQ)) { |
284 | HIPVersionArg = A->getValue(); |
285 | unsigned Major = ~0U; |
286 | unsigned Minor = ~0U; |
287 | SmallVector<StringRef, 3> Parts; |
288 | HIPVersionArg.split(A&: Parts, Separator: '.'); |
289 | if (Parts.size()) |
290 | Parts[0].getAsInteger(Radix: 0, Result&: Major); |
291 | if (Parts.size() > 1) |
292 | Parts[1].getAsInteger(Radix: 0, Result&: Minor); |
293 | if (Parts.size() > 2) |
294 | VersionPatch = Parts[2].str(); |
295 | if (VersionPatch.empty()) |
296 | VersionPatch = "0" ; |
297 | if (Major != ~0U && Minor == ~0U) |
298 | Minor = 0; |
299 | if (Major == ~0U || Minor == ~0U) |
300 | D.Diag(DiagID: diag::err_drv_invalid_value) |
301 | << A->getAsString(Args) << HIPVersionArg; |
302 | |
303 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
304 | DetectedVersion = |
305 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
306 | } else { |
307 | VersionPatch = DefaultVersionPatch; |
308 | VersionMajorMinor = |
309 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
310 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
311 | Twine(DefaultVersionMinor) + "." + VersionPatch) |
312 | .str(); |
313 | } |
314 | |
315 | if (DetectHIPRuntime) |
316 | detectHIPRuntime(); |
317 | if (DetectDeviceLib) |
318 | detectDeviceLibrary(); |
319 | } |
320 | |
321 | void RocmInstallationDetector::detectDeviceLibrary() { |
322 | assert(LibDevicePath.empty()); |
323 | |
324 | if (!RocmDeviceLibPathArg.empty()) |
325 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
326 | else if (std::optional<std::string> LibPathEnv = |
327 | llvm::sys::Process::GetEnv(name: "HIP_DEVICE_LIB_PATH" )) |
328 | LibDevicePath = std::move(*LibPathEnv); |
329 | |
330 | auto &FS = D.getVFS(); |
331 | if (!LibDevicePath.empty()) { |
332 | // Maintain compatability with HIP flag/envvar pointing directly at the |
333 | // bitcode library directory. This points directly at the library path instead |
334 | // of the rocm root installation. |
335 | if (!FS.exists(Path: LibDevicePath)) |
336 | return; |
337 | |
338 | scanLibDevicePath(Path: LibDevicePath); |
339 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
340 | return; |
341 | } |
342 | |
343 | // Check device library exists at the given path. |
344 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
345 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
346 | if (CheckLibDevice && !FS.exists(Path)) |
347 | return false; |
348 | |
349 | scanLibDevicePath(Path); |
350 | |
351 | if (!NoBuiltinLibs) { |
352 | // Check that the required non-target libraries are all available. |
353 | if (!allGenericLibsValid()) |
354 | return false; |
355 | |
356 | // Check that we have found at least one libdevice that we can link in |
357 | // if -nobuiltinlib hasn't been specified. |
358 | if (LibDeviceMap.empty()) |
359 | return false; |
360 | } |
361 | return true; |
362 | }; |
363 | |
364 | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
365 | LibDevicePath = D.ResourceDir; |
366 | llvm::sys::path::append(path&: LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
367 | b: "amdgcn" , c: "bitcode" ); |
368 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
369 | if (HasDeviceLibrary) |
370 | return; |
371 | |
372 | // Find device libraries in a legacy ROCm directory structure |
373 | // ${ROCM_ROOT}/amdgcn/bitcode/* |
374 | auto &ROCmDirs = getInstallationPathCandidates(); |
375 | for (const auto &Candidate : ROCmDirs) { |
376 | LibDevicePath = Candidate.Path; |
377 | llvm::sys::path::append(path&: LibDevicePath, a: "amdgcn" , b: "bitcode" ); |
378 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
379 | if (HasDeviceLibrary) |
380 | return; |
381 | } |
382 | } |
383 | |
384 | void RocmInstallationDetector::detectHIPRuntime() { |
385 | SmallVector<Candidate, 4> HIPSearchDirs; |
386 | if (!HIPPathArg.empty()) |
387 | HIPSearchDirs.emplace_back(Args: HIPPathArg.str()); |
388 | else if (std::optional<std::string> HIPPathEnv = |
389 | llvm::sys::Process::GetEnv(name: "HIP_PATH" )) { |
390 | if (!HIPPathEnv->empty()) |
391 | HIPSearchDirs.emplace_back(Args: std::move(*HIPPathEnv)); |
392 | } |
393 | if (HIPSearchDirs.empty()) |
394 | HIPSearchDirs.append(RHS: getInstallationPathCandidates()); |
395 | auto &FS = D.getVFS(); |
396 | |
397 | for (const auto &Candidate : HIPSearchDirs) { |
398 | InstallPath = Candidate.Path; |
399 | if (InstallPath.empty() || !FS.exists(Path: InstallPath)) |
400 | continue; |
401 | |
402 | BinPath = InstallPath; |
403 | llvm::sys::path::append(path&: BinPath, a: "bin" ); |
404 | IncludePath = InstallPath; |
405 | llvm::sys::path::append(path&: IncludePath, a: "include" ); |
406 | LibPath = InstallPath; |
407 | llvm::sys::path::append(path&: LibPath, a: "lib" ); |
408 | SharePath = InstallPath; |
409 | llvm::sys::path::append(path&: SharePath, a: "share" ); |
410 | |
411 | // Get parent of InstallPath and append "share" |
412 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(path: InstallPath); |
413 | llvm::sys::path::append(path&: ParentSharePath, a: "share" ); |
414 | |
415 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "" , |
416 | const Twine &c = "" , const Twine &d = "" ) { |
417 | SmallString<0> newpath = path; |
418 | llvm::sys::path::append(path&: newpath, a, b, c, d); |
419 | return newpath; |
420 | }; |
421 | // If HIP version file can be found and parsed, use HIP version from there. |
422 | std::vector<SmallString<0>> VersionFilePaths = { |
423 | Append(SharePath, "hip" , "version" ), |
424 | InstallPath != D.SysRoot + "/usr/local" |
425 | ? Append(ParentSharePath, "hip" , "version" ) |
426 | : SmallString<0>(), |
427 | Append(BinPath, ".hipVersion" )}; |
428 | |
429 | for (const auto &VersionFilePath : VersionFilePaths) { |
430 | if (VersionFilePath.empty()) |
431 | continue; |
432 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
433 | FS.getBufferForFile(Name: VersionFilePath); |
434 | if (!VersionFile) |
435 | continue; |
436 | if (HIPVersionArg.empty() && VersionFile) |
437 | if (parseHIPVersionFile(V: (*VersionFile)->getBuffer())) |
438 | continue; |
439 | |
440 | HasHIPRuntime = true; |
441 | return; |
442 | } |
443 | // Otherwise, if -rocm-path is specified (no strict checking), use the |
444 | // default HIP version or specified by --hip-version. |
445 | if (!Candidate.StrictChecking) { |
446 | HasHIPRuntime = true; |
447 | return; |
448 | } |
449 | } |
450 | HasHIPRuntime = false; |
451 | } |
452 | |
453 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
454 | if (hasHIPRuntime()) |
455 | OS << "Found HIP installation: " << InstallPath << ", version " |
456 | << DetectedVersion << '\n'; |
457 | } |
458 | |
459 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
460 | ArgStringList &CC1Args) const { |
461 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
462 | !DriverArgs.hasArg(Ids: options::OPT_nohipwrapperinc); |
463 | bool HasHipStdPar = DriverArgs.hasArg(Ids: options::OPT_hipstdpar); |
464 | |
465 | if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) { |
466 | // HIP header includes standard library wrapper headers under clang |
467 | // cuda_wrappers directory. Since these wrapper headers include_next |
468 | // standard C++ headers, whereas libc++ headers include_next other clang |
469 | // headers. The include paths have to follow this order: |
470 | // - wrapper include path |
471 | // - standard C++ include path |
472 | // - other clang include path |
473 | // Since standard C++ and other clang include paths are added in other |
474 | // places after this function, here we only need to make sure wrapper |
475 | // include path is added. |
476 | // |
477 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
478 | // a workaround. |
479 | SmallString<128> P(D.ResourceDir); |
480 | if (UsesRuntimeWrapper) |
481 | llvm::sys::path::append(path&: P, a: "include" , b: "cuda_wrappers" ); |
482 | CC1Args.push_back(Elt: "-internal-isystem" ); |
483 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P)); |
484 | } |
485 | |
486 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
487 | StringRef Inc = getIncludePath(); |
488 | auto &FS = D.getVFS(); |
489 | |
490 | if (!hasHIPStdParLibrary()) |
491 | if (!HIPStdParPathArg.empty() || |
492 | !FS.exists(Path: Inc + "/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp" )) { |
493 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_lib); |
494 | return; |
495 | } |
496 | if (!HasRocThrustLibrary && !FS.exists(Path: Inc + "/thrust" )) { |
497 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_thrust_lib); |
498 | return; |
499 | } |
500 | if (!HasRocPrimLibrary && !FS.exists(Path: Inc + "/rocprim" )) { |
501 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_prim_lib); |
502 | return; |
503 | } |
504 | const char *ThrustPath; |
505 | if (HasRocThrustLibrary) |
506 | ThrustPath = DriverArgs.MakeArgString(Str: HIPRocThrustPathArg); |
507 | else |
508 | ThrustPath = DriverArgs.MakeArgString(Str: Inc + "/thrust" ); |
509 | |
510 | const char *HIPStdParPath; |
511 | if (hasHIPStdParLibrary()) |
512 | HIPStdParPath = DriverArgs.MakeArgString(Str: HIPStdParPathArg); |
513 | else |
514 | HIPStdParPath = DriverArgs.MakeArgString(Str: StringRef(ThrustPath) + |
515 | "/system/hip/hipstdpar" ); |
516 | |
517 | const char *PrimPath; |
518 | if (HasRocPrimLibrary) |
519 | PrimPath = DriverArgs.MakeArgString(Str: HIPRocPrimPathArg); |
520 | else |
521 | PrimPath = DriverArgs.MakeArgString(Str: getIncludePath() + "/rocprim" ); |
522 | |
523 | CC1Args.append(IL: {"-idirafter" , ThrustPath, "-idirafter" , PrimPath, |
524 | "-idirafter" , HIPStdParPath, "-include" , |
525 | "hipstdpar_lib.hpp" }); |
526 | }; |
527 | |
528 | if (!DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc, |
529 | Default: true)) { |
530 | if (HasHipStdPar) |
531 | HandleHipStdPar(); |
532 | |
533 | return; |
534 | } |
535 | |
536 | if (!hasHIPRuntime()) { |
537 | D.Diag(DiagID: diag::err_drv_no_hip_runtime); |
538 | return; |
539 | } |
540 | |
541 | CC1Args.push_back(Elt: "-idirafter" ); |
542 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: getIncludePath())); |
543 | if (UsesRuntimeWrapper) |
544 | CC1Args.append(IL: {"-include" , "__clang_hip_runtime_wrapper.h" }); |
545 | if (HasHipStdPar) |
546 | HandleHipStdPar(); |
547 | } |
548 | |
549 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
550 | const InputInfo &Output, |
551 | const InputInfoList &Inputs, |
552 | const ArgList &Args, |
553 | const char *LinkingOutput) const { |
554 | std::string Linker = getToolChain().GetLinkerPath(); |
555 | ArgStringList CmdArgs; |
556 | if (!Args.hasArg(Ids: options::OPT_r)) { |
557 | CmdArgs.push_back(Elt: "--no-undefined" ); |
558 | CmdArgs.push_back(Elt: "-shared" ); |
559 | } |
560 | |
561 | if (C.getDriver().isUsingLTO()) { |
562 | const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin); |
563 | addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs, IsThinLTO: ThinLTO); |
564 | } else if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) { |
565 | CmdArgs.push_back(Elt: Args.MakeArgString( |
566 | Str: "-plugin-opt=mcpu=" + |
567 | getProcessorFromTargetID(T: getToolChain().getTriple(), |
568 | OffloadArch: Args.getLastArgValue(Id: options::OPT_mcpu_EQ)))); |
569 | } |
570 | addLinkerCompressDebugSectionsOption(TC: getToolChain(), Args, CmdArgs); |
571 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
572 | Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L); |
573 | AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA); |
574 | |
575 | // Always pass the target-id features to the LTO job. |
576 | std::vector<StringRef> Features; |
577 | getAMDGPUTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args, |
578 | Features); |
579 | if (!Features.empty()) { |
580 | CmdArgs.push_back( |
581 | Elt: Args.MakeArgString(Str: "-plugin-opt=-mattr=" + llvm::join(R&: Features, Separator: "," ))); |
582 | } |
583 | |
584 | if (Args.hasArg(Ids: options::OPT_stdlib)) |
585 | CmdArgs.append(IL: {"-lc" , "-lm" }); |
586 | if (Args.hasArg(Ids: options::OPT_startfiles)) { |
587 | std::optional<std::string> IncludePath = getToolChain().getStdlibPath(); |
588 | if (!IncludePath) |
589 | IncludePath = "/lib" ; |
590 | SmallString<128> P(*IncludePath); |
591 | llvm::sys::path::append(path&: P, a: "crt1.o" ); |
592 | CmdArgs.push_back(Elt: Args.MakeArgString(Str: P)); |
593 | } |
594 | |
595 | CmdArgs.push_back(Elt: "-o" ); |
596 | CmdArgs.push_back(Elt: Output.getFilename()); |
597 | C.addCommand(C: std::make_unique<Command>( |
598 | args: JA, args: *this, args: ResponseFileSupport::AtFileCurCP(), args: Args.MakeArgString(Str: Linker), |
599 | args&: CmdArgs, args: Inputs, args: Output)); |
600 | } |
601 | |
602 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
603 | const llvm::Triple &Triple, |
604 | const llvm::opt::ArgList &Args, |
605 | std::vector<StringRef> &Features) { |
606 | // Add target ID features to -target-feature options. No diagnostics should |
607 | // be emitted here since invalid target ID is diagnosed at other places. |
608 | StringRef TargetID; |
609 | if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) |
610 | TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ); |
611 | else if (Args.hasArg(Ids: options::OPT_march_EQ)) |
612 | TargetID = Args.getLastArgValue(Id: options::OPT_march_EQ); |
613 | if (!TargetID.empty()) { |
614 | llvm::StringMap<bool> FeatureMap; |
615 | auto OptionalGpuArch = parseTargetID(T: Triple, OffloadArch: TargetID, FeatureMap: &FeatureMap); |
616 | if (OptionalGpuArch) { |
617 | StringRef GpuArch = *OptionalGpuArch; |
618 | // Iterate through all possible target ID features for the given GPU. |
619 | // If it is mapped to true, add +feature. |
620 | // If it is mapped to false, add -feature. |
621 | // If it is not in the map (default), do not add it |
622 | for (auto &&Feature : getAllPossibleTargetIDFeatures(T: Triple, Processor: GpuArch)) { |
623 | auto Pos = FeatureMap.find(Key: Feature); |
624 | if (Pos == FeatureMap.end()) |
625 | continue; |
626 | Features.push_back(x: Args.MakeArgStringRef( |
627 | Str: (Twine(Pos->second ? "+" : "-" ) + Feature).str())); |
628 | } |
629 | } |
630 | } |
631 | |
632 | if (Args.hasFlag(Pos: options::OPT_mwavefrontsize64, |
633 | Neg: options::OPT_mno_wavefrontsize64, Default: false)) |
634 | Features.push_back(x: "+wavefrontsize64" ); |
635 | |
636 | if (Args.hasFlag(Pos: options::OPT_mamdgpu_precise_memory_op, |
637 | Neg: options::OPT_mno_amdgpu_precise_memory_op, Default: false)) |
638 | Features.push_back(x: "+precise-memory" ); |
639 | |
640 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
641 | Group: options::OPT_m_amdgpu_Features_Group); |
642 | } |
643 | |
644 | /// AMDGPU Toolchain |
645 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
646 | const ArgList &Args) |
647 | : Generic_ELF(D, Triple, Args), |
648 | OptionsDefault( |
649 | {{options::OPT_O, "3" }, {options::OPT_cl_std_EQ, "CL1.2" }}) { |
650 | // Check code object version options. Emit warnings for legacy options |
651 | // and errors for the last invalid code object version options. |
652 | // It is done here to avoid repeated warning or error messages for |
653 | // each tool invocation. |
654 | checkAMDGPUCodeObjectVersion(D, Args); |
655 | } |
656 | |
657 | Tool *AMDGPUToolChain::buildLinker() const { |
658 | return new tools::amdgpu::Linker(*this); |
659 | } |
660 | |
661 | DerivedArgList * |
662 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
663 | Action::OffloadKind DeviceOffloadKind) const { |
664 | |
665 | DerivedArgList *DAL = |
666 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
667 | |
668 | const OptTable &Opts = getDriver().getOpts(); |
669 | |
670 | if (!DAL) |
671 | DAL = new DerivedArgList(Args.getBaseArgs()); |
672 | |
673 | for (Arg *A : Args) |
674 | DAL->append(A); |
675 | |
676 | // Replace -mcpu=native with detected GPU. |
677 | Arg *LastMCPUArg = DAL->getLastArg(Ids: options::OPT_mcpu_EQ); |
678 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native" ) { |
679 | DAL->eraseArg(Id: options::OPT_mcpu_EQ); |
680 | auto GPUsOrErr = getSystemGPUArchs(Args); |
681 | if (!GPUsOrErr) { |
682 | getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch) |
683 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
684 | << llvm::toString(E: GPUsOrErr.takeError()) << "-mcpu" ; |
685 | } else { |
686 | auto &GPUs = *GPUsOrErr; |
687 | if (GPUs.size() > 1) { |
688 | getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch) |
689 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
690 | << llvm::join(R&: GPUs, Separator: ", " ) << "-mcpu" ; |
691 | } |
692 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_mcpu_EQ), |
693 | Value: Args.MakeArgString(Str: GPUs.front())); |
694 | } |
695 | } |
696 | |
697 | checkTargetID(DriverArgs: *DAL); |
698 | |
699 | if (Args.getLastArgValue(Id: options::OPT_x) != "cl" ) |
700 | return DAL; |
701 | |
702 | // Phase 1 (.cl -> .bc) |
703 | if (Args.hasArg(Ids: options::OPT_c) && Args.hasArg(Ids: options::OPT_emit_llvm)) { |
704 | DAL->AddFlagArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: getTriple().isArch64Bit() |
705 | ? options::OPT_m64 |
706 | : options::OPT_m32)); |
707 | |
708 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
709 | // as they defined that way in Options.td |
710 | if (!Args.hasArg(Ids: options::OPT_O, Ids: options::OPT_O0, Ids: options::OPT_O4, |
711 | Ids: options::OPT_Ofast)) |
712 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_O), |
713 | Value: getOptionDefault(OptID: options::OPT_O)); |
714 | } |
715 | |
716 | return DAL; |
717 | } |
718 | |
719 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
720 | llvm::AMDGPU::GPUKind Kind) { |
721 | |
722 | // Assume nothing without a specific target. |
723 | if (Kind == llvm::AMDGPU::GK_NONE) |
724 | return false; |
725 | |
726 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
727 | |
728 | // Default to enabling f32 denormals by default on subtargets where fma is |
729 | // fast with denormals |
730 | const bool BothDenormAndFMAFast = |
731 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
732 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
733 | return !BothDenormAndFMAFast; |
734 | } |
735 | |
736 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
737 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
738 | const llvm::fltSemantics *FPType) const { |
739 | // Denormals should always be enabled for f16 and f64. |
740 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
741 | return llvm::DenormalMode::getIEEE(); |
742 | |
743 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
744 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
745 | auto Arch = getProcessorFromTargetID(T: getTriple(), OffloadArch: JA.getOffloadingArch()); |
746 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: Arch); |
747 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
748 | DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
749 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
750 | Default: getDefaultDenormsAreZeroForTarget(Kind))) |
751 | return llvm::DenormalMode::getPreserveSign(); |
752 | |
753 | return llvm::DenormalMode::getIEEE(); |
754 | } |
755 | |
756 | const StringRef GpuArch = getGPUArch(DriverArgs); |
757 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
758 | |
759 | // TODO: There are way too many flags that change this. Do we need to check |
760 | // them all? |
761 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
762 | getDefaultDenormsAreZeroForTarget(Kind); |
763 | |
764 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
765 | // also implicit treated as zero (DAZ). |
766 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
767 | llvm::DenormalMode::getIEEE(); |
768 | } |
769 | |
770 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
771 | llvm::AMDGPU::GPUKind Kind) { |
772 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
773 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
774 | |
775 | return !HasWave32 || DriverArgs.hasFlag( |
776 | Pos: options::OPT_mwavefrontsize64, Neg: options::OPT_mno_wavefrontsize64, Default: false); |
777 | } |
778 | |
779 | |
780 | /// ROCM Toolchain |
781 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
782 | const ArgList &Args) |
783 | : AMDGPUToolChain(D, Triple, Args) { |
784 | RocmInstallation->detectDeviceLibrary(); |
785 | } |
786 | |
787 | void AMDGPUToolChain::addClangTargetOptions( |
788 | const llvm::opt::ArgList &DriverArgs, |
789 | llvm::opt::ArgStringList &CC1Args, |
790 | Action::OffloadKind DeviceOffloadingKind) const { |
791 | // Default to "hidden" visibility, as object level linking will not be |
792 | // supported for the foreseeable future. |
793 | if (!DriverArgs.hasArg(Ids: options::OPT_fvisibility_EQ, |
794 | Ids: options::OPT_fvisibility_ms_compat)) { |
795 | CC1Args.push_back(Elt: "-fvisibility=hidden" ); |
796 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
797 | } |
798 | } |
799 | |
800 | void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
801 | // AMDGPU does not support atomic lib call. Treat atomic alignment |
802 | // warnings as errors. |
803 | CC1Args.push_back(Elt: "-Werror=atomic-alignment" ); |
804 | } |
805 | |
806 | StringRef |
807 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
808 | return getProcessorFromTargetID( |
809 | T: getTriple(), OffloadArch: DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ)); |
810 | } |
811 | |
812 | AMDGPUToolChain::ParsedTargetIDType |
813 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
814 | StringRef TargetID = DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ); |
815 | if (TargetID.empty()) |
816 | return {.OptionalTargetID: std::nullopt, .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
817 | |
818 | llvm::StringMap<bool> FeatureMap; |
819 | auto OptionalGpuArch = parseTargetID(T: getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
820 | if (!OptionalGpuArch) |
821 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
822 | |
823 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: OptionalGpuArch->str(), .OptionalFeatures: FeatureMap}; |
824 | } |
825 | |
826 | void AMDGPUToolChain::checkTargetID( |
827 | const llvm::opt::ArgList &DriverArgs) const { |
828 | auto PTID = getParsedTargetID(DriverArgs); |
829 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
830 | getDriver().Diag(DiagID: clang::diag::err_drv_bad_target_id) |
831 | << *PTID.OptionalTargetID; |
832 | } |
833 | } |
834 | |
835 | Expected<SmallVector<std::string>> |
836 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
837 | // Detect AMD GPUs availible on the system. |
838 | std::string Program; |
839 | if (Arg *A = Args.getLastArg(Ids: options::OPT_amdgpu_arch_tool_EQ)) |
840 | Program = A->getValue(); |
841 | else |
842 | Program = GetProgramPath(Name: "amdgpu-arch" ); |
843 | |
844 | auto StdoutOrErr = executeToolChainProgram(Executable: Program); |
845 | if (!StdoutOrErr) |
846 | return StdoutOrErr.takeError(); |
847 | |
848 | SmallVector<std::string, 1> GPUArchs; |
849 | for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n" )) |
850 | if (!Arch.empty()) |
851 | GPUArchs.push_back(Elt: Arch.str()); |
852 | |
853 | if (GPUArchs.empty()) |
854 | return llvm::createStringError(EC: std::error_code(), |
855 | S: "No AMD GPU detected in the system" ); |
856 | |
857 | return std::move(GPUArchs); |
858 | } |
859 | |
860 | void ROCMToolChain::addClangTargetOptions( |
861 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
862 | Action::OffloadKind DeviceOffloadingKind) const { |
863 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
864 | DeviceOffloadingKind); |
865 | |
866 | // For the OpenCL case where there is no offload target, accept -nostdlib to |
867 | // disable bitcode linking. |
868 | if (DeviceOffloadingKind == Action::OFK_None && |
869 | DriverArgs.hasArg(Ids: options::OPT_nostdlib)) |
870 | return; |
871 | |
872 | if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, |
873 | Default: true)) |
874 | return; |
875 | |
876 | // Get the device name and canonicalize it |
877 | const StringRef GpuArch = getGPUArch(DriverArgs); |
878 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
879 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
880 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
881 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
882 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
883 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
884 | ABIVer)) |
885 | return; |
886 | |
887 | bool Wave64 = isWave64(DriverArgs, Kind); |
888 | // TODO: There are way too many flags that change this. Do we need to check |
889 | // them all? |
890 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
891 | getDefaultDenormsAreZeroForTarget(Kind); |
892 | bool FiniteOnly = DriverArgs.hasArg(Ids: options::OPT_cl_finite_math_only); |
893 | |
894 | bool UnsafeMathOpt = |
895 | DriverArgs.hasArg(Ids: options::OPT_cl_unsafe_math_optimizations); |
896 | bool FastRelaxedMath = DriverArgs.hasArg(Ids: options::OPT_cl_fast_relaxed_math); |
897 | bool CorrectSqrt = |
898 | DriverArgs.hasArg(Ids: options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
899 | |
900 | // GPU Sanitizer currently only supports ASan and is enabled through host |
901 | // ASan. |
902 | bool GPUSan = DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
903 | Neg: options::OPT_fno_gpu_sanitize, Default: true) && |
904 | getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt(); |
905 | |
906 | // Add the OpenCL specific bitcode library. |
907 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
908 | BCLibs.emplace_back(Args: RocmInstallation->getOpenCLPath().str()); |
909 | |
910 | // Add the generic set of libraries. |
911 | BCLibs.append(RHS: RocmInstallation->getCommonBitcodeLibs( |
912 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
913 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP: false)); |
914 | |
915 | for (auto [BCFile, Internalize] : BCLibs) { |
916 | if (Internalize) |
917 | CC1Args.push_back(Elt: "-mlink-builtin-bitcode" ); |
918 | else |
919 | CC1Args.push_back(Elt: "-mlink-bitcode-file" ); |
920 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile)); |
921 | } |
922 | } |
923 | |
924 | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
925 | StringRef GPUArch, StringRef LibDeviceFile, |
926 | DeviceLibABIVersion ABIVer) const { |
927 | if (!hasDeviceLibrary()) { |
928 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 0; |
929 | return false; |
930 | } |
931 | if (LibDeviceFile.empty()) { |
932 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
933 | return false; |
934 | } |
935 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
936 | // Starting from COV6, we will report minimum ROCm version requirement in |
937 | // the error message. |
938 | if (ABIVer.getAsCodeObjectVersion() < 6) |
939 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString() << 0; |
940 | else |
941 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) |
942 | << 2 << ABIVer.toString() << 1 << "6.3" ; |
943 | return false; |
944 | } |
945 | return true; |
946 | } |
947 | |
948 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
949 | RocmInstallationDetector::getCommonBitcodeLibs( |
950 | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
951 | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
952 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, |
953 | bool isOpenMP) const { |
954 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; |
955 | |
956 | auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, |
957 | bool Internalize = true) { |
958 | BCLib.ShouldInternalize = Internalize; |
959 | BCLibs.emplace_back(Args&: BCLib); |
960 | }; |
961 | auto AddSanBCLibs = [&]() { |
962 | if (GPUSan) |
963 | AddBCLib(getAsanRTLPath(), false); |
964 | }; |
965 | |
966 | AddSanBCLibs(); |
967 | AddBCLib(getOCMLPath()); |
968 | if (!isOpenMP) |
969 | AddBCLib(getOCKLPath()); |
970 | else if (GPUSan && isOpenMP) |
971 | AddBCLib(getOCKLPath(), false); |
972 | AddBCLib(getDenormalsAreZeroPath(Enabled: DAZ)); |
973 | AddBCLib(getUnsafeMathPath(Enabled: UnsafeMathOpt || FastRelaxedMath)); |
974 | AddBCLib(getFiniteOnlyPath(Enabled: FiniteOnly || FastRelaxedMath)); |
975 | AddBCLib(getCorrectlyRoundedSqrtPath(Enabled: CorrectSqrt)); |
976 | AddBCLib(getWavefrontSize64Path(Enabled: Wave64)); |
977 | AddBCLib(LibDeviceFile); |
978 | auto ABIVerPath = getABIVersionPath(ABIVer); |
979 | if (!ABIVerPath.empty()) |
980 | AddBCLib(ABIVerPath); |
981 | |
982 | return BCLibs; |
983 | } |
984 | |
985 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
986 | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
987 | const std::string &GPUArch, |
988 | bool isOpenMP) const { |
989 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GPUArch); |
990 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
991 | |
992 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
993 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
994 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
995 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
996 | ABIVer)) |
997 | return {}; |
998 | |
999 | // If --hip-device-lib is not set, add the default bitcode libraries. |
1000 | // TODO: There are way too many flags that change this. Do we need to check |
1001 | // them all? |
1002 | bool DAZ = DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
1003 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
1004 | Default: getDefaultDenormsAreZeroForTarget(Kind)); |
1005 | bool FiniteOnly = DriverArgs.hasFlag( |
1006 | Pos: options::OPT_ffinite_math_only, Neg: options::OPT_fno_finite_math_only, Default: false); |
1007 | bool UnsafeMathOpt = |
1008 | DriverArgs.hasFlag(Pos: options::OPT_funsafe_math_optimizations, |
1009 | Neg: options::OPT_fno_unsafe_math_optimizations, Default: false); |
1010 | bool FastRelaxedMath = DriverArgs.hasFlag(Pos: options::OPT_ffast_math, |
1011 | Neg: options::OPT_fno_fast_math, Default: false); |
1012 | bool CorrectSqrt = DriverArgs.hasFlag( |
1013 | Pos: options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
1014 | Neg: options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, Default: true); |
1015 | bool Wave64 = isWave64(DriverArgs, Kind); |
1016 | |
1017 | // GPU Sanitizer currently only supports ASan and is enabled through host |
1018 | // ASan. |
1019 | bool GPUSan = DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
1020 | Neg: options::OPT_fno_gpu_sanitize, Default: true) && |
1021 | getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt(); |
1022 | |
1023 | return RocmInstallation->getCommonBitcodeLibs( |
1024 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
1025 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); |
1026 | } |
1027 | |
1028 | bool AMDGPUToolChain::shouldSkipSanitizeOption( |
1029 | const ToolChain &TC, const llvm::opt::ArgList &DriverArgs, |
1030 | StringRef TargetID, const llvm::opt::Arg *A) const { |
1031 | // For actions without targetID, do nothing. |
1032 | if (TargetID.empty()) |
1033 | return false; |
1034 | Option O = A->getOption(); |
1035 | |
1036 | if (!O.matches(ID: options::OPT_fsanitize_EQ)) |
1037 | return false; |
1038 | |
1039 | if (!DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
1040 | Neg: options::OPT_fno_gpu_sanitize, Default: true)) |
1041 | return true; |
1042 | |
1043 | auto &Diags = TC.getDriver().getDiags(); |
1044 | |
1045 | // For simplicity, we only allow -fsanitize=address |
1046 | SanitizerMask K = parseSanitizerValue(Value: A->getValue(), /*AllowGroups=*/false); |
1047 | if (K != SanitizerKind::Address) |
1048 | return true; |
1049 | |
1050 | llvm::StringMap<bool> FeatureMap; |
1051 | auto OptionalGpuArch = parseTargetID(T: TC.getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
1052 | |
1053 | assert(OptionalGpuArch && "Invalid Target ID" ); |
1054 | (void)OptionalGpuArch; |
1055 | auto Loc = FeatureMap.find(Key: "xnack" ); |
1056 | if (Loc == FeatureMap.end() || !Loc->second) { |
1057 | Diags.Report( |
1058 | DiagID: clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature) |
1059 | << A->getAsString(Args: DriverArgs) << TargetID << "xnack+" ; |
1060 | return true; |
1061 | } |
1062 | return false; |
1063 | } |
1064 | |