| 1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AMDGPU.h" |
| 10 | #include "clang/Basic/TargetID.h" |
| 11 | #include "clang/Config/config.h" |
| 12 | #include "clang/Driver/CommonArgs.h" |
| 13 | #include "clang/Driver/Compilation.h" |
| 14 | #include "clang/Driver/InputInfo.h" |
| 15 | #include "clang/Driver/Options.h" |
| 16 | #include "clang/Driver/SanitizerArgs.h" |
| 17 | #include "llvm/ADT/StringExtras.h" |
| 18 | #include "llvm/Option/ArgList.h" |
| 19 | #include "llvm/Support/Error.h" |
| 20 | #include "llvm/Support/LineIterator.h" |
| 21 | #include "llvm/Support/Path.h" |
| 22 | #include "llvm/Support/Process.h" |
| 23 | #include "llvm/Support/VirtualFileSystem.h" |
| 24 | #include "llvm/TargetParser/Host.h" |
| 25 | #include <optional> |
| 26 | #include <system_error> |
| 27 | |
| 28 | using namespace clang::driver; |
| 29 | using namespace clang::driver::tools; |
| 30 | using namespace clang::driver::toolchains; |
| 31 | using namespace clang; |
| 32 | using namespace llvm::opt; |
| 33 | |
| 34 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { |
| 35 | assert(!Path.empty()); |
| 36 | |
| 37 | const StringRef Suffix(".bc" ); |
| 38 | const StringRef Suffix2(".amdgcn.bc" ); |
| 39 | |
| 40 | std::error_code EC; |
| 41 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Dir: Path, EC), LE; |
| 42 | !EC && LI != LE; LI = LI.increment(EC)) { |
| 43 | StringRef FilePath = LI->path(); |
| 44 | StringRef FileName = llvm::sys::path::filename(path: FilePath); |
| 45 | if (!FileName.ends_with(Suffix)) |
| 46 | continue; |
| 47 | |
| 48 | StringRef BaseName; |
| 49 | if (FileName.ends_with(Suffix: Suffix2)) |
| 50 | BaseName = FileName.drop_back(N: Suffix2.size()); |
| 51 | else if (FileName.ends_with(Suffix)) |
| 52 | BaseName = FileName.drop_back(N: Suffix.size()); |
| 53 | |
| 54 | const StringRef ABIVersionPrefix = "oclc_abi_version_" ; |
| 55 | if (BaseName == "ocml" ) { |
| 56 | OCML = FilePath; |
| 57 | } else if (BaseName == "ockl" ) { |
| 58 | OCKL = FilePath; |
| 59 | } else if (BaseName == "opencl" ) { |
| 60 | OpenCL = FilePath; |
| 61 | } else if (BaseName == "asanrtl" ) { |
| 62 | AsanRTL = FilePath; |
| 63 | } else if (BaseName == "oclc_finite_only_off" ) { |
| 64 | FiniteOnly.Off = FilePath; |
| 65 | } else if (BaseName == "oclc_finite_only_on" ) { |
| 66 | FiniteOnly.On = FilePath; |
| 67 | } else if (BaseName == "oclc_daz_opt_on" ) { |
| 68 | DenormalsAreZero.On = FilePath; |
| 69 | } else if (BaseName == "oclc_daz_opt_off" ) { |
| 70 | DenormalsAreZero.Off = FilePath; |
| 71 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on" ) { |
| 72 | CorrectlyRoundedSqrt.On = FilePath; |
| 73 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off" ) { |
| 74 | CorrectlyRoundedSqrt.Off = FilePath; |
| 75 | } else if (BaseName == "oclc_unsafe_math_on" ) { |
| 76 | UnsafeMath.On = FilePath; |
| 77 | } else if (BaseName == "oclc_unsafe_math_off" ) { |
| 78 | UnsafeMath.Off = FilePath; |
| 79 | } else if (BaseName == "oclc_wavefrontsize64_on" ) { |
| 80 | WavefrontSize64.On = FilePath; |
| 81 | } else if (BaseName == "oclc_wavefrontsize64_off" ) { |
| 82 | WavefrontSize64.Off = FilePath; |
| 83 | } else if (BaseName.starts_with(Prefix: ABIVersionPrefix)) { |
| 84 | unsigned ABIVersionNumber; |
| 85 | if (BaseName.drop_front(N: ABIVersionPrefix.size()) |
| 86 | .getAsInteger(/*Redex=*/Radix: 0, Result&: ABIVersionNumber)) |
| 87 | continue; |
| 88 | ABIVersionMap[ABIVersionNumber] = FilePath.str(); |
| 89 | } else { |
| 90 | // Process all bitcode filenames that look like |
| 91 | // ocl_isa_version_XXX.amdgcn.bc |
| 92 | const StringRef DeviceLibPrefix = "oclc_isa_version_" ; |
| 93 | if (!BaseName.starts_with(Prefix: DeviceLibPrefix)) |
| 94 | continue; |
| 95 | |
| 96 | StringRef IsaVersionNumber = |
| 97 | BaseName.drop_front(N: DeviceLibPrefix.size()); |
| 98 | |
| 99 | llvm::Twine GfxName = Twine("gfx" ) + IsaVersionNumber; |
| 100 | SmallString<8> Tmp; |
| 101 | LibDeviceMap.insert( |
| 102 | KV: std::make_pair(x: GfxName.toStringRef(Out&: Tmp), y: FilePath.str())); |
| 103 | } |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | // Parse and extract version numbers from `.hipVersion`. Return `true` if |
| 108 | // the parsing fails. |
| 109 | bool RocmInstallationDetector::parseHIPVersionFile(llvm::StringRef V) { |
| 110 | SmallVector<StringRef, 4> VersionParts; |
| 111 | V.split(A&: VersionParts, Separator: '\n'); |
| 112 | unsigned Major = ~0U; |
| 113 | unsigned Minor = ~0U; |
| 114 | for (auto Part : VersionParts) { |
| 115 | auto Splits = Part.rtrim().split(Separator: '='); |
| 116 | if (Splits.first == "HIP_VERSION_MAJOR" ) { |
| 117 | if (Splits.second.getAsInteger(Radix: 0, Result&: Major)) |
| 118 | return true; |
| 119 | } else if (Splits.first == "HIP_VERSION_MINOR" ) { |
| 120 | if (Splits.second.getAsInteger(Radix: 0, Result&: Minor)) |
| 121 | return true; |
| 122 | } else if (Splits.first == "HIP_VERSION_PATCH" ) |
| 123 | VersionPatch = Splits.second.str(); |
| 124 | } |
| 125 | if (Major == ~0U || Minor == ~0U) |
| 126 | return true; |
| 127 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
| 128 | DetectedVersion = |
| 129 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
| 130 | return false; |
| 131 | } |
| 132 | |
| 133 | /// \returns a list of candidate directories for ROCm installation, which is |
| 134 | /// cached and populated only once. |
| 135 | const SmallVectorImpl<RocmInstallationDetector::Candidate> & |
| 136 | RocmInstallationDetector::getInstallationPathCandidates() { |
| 137 | |
| 138 | // Return the cached candidate list if it has already been populated. |
| 139 | if (!ROCmSearchDirs.empty()) |
| 140 | return ROCmSearchDirs; |
| 141 | |
| 142 | auto DoPrintROCmSearchDirs = [&]() { |
| 143 | if (PrintROCmSearchDirs) |
| 144 | for (auto Cand : ROCmSearchDirs) { |
| 145 | llvm::errs() << "ROCm installation search path: " << Cand.Path << '\n'; |
| 146 | } |
| 147 | }; |
| 148 | |
| 149 | // For candidate specified by --rocm-path we do not do strict check, i.e., |
| 150 | // checking existence of HIP version file and device library files. |
| 151 | if (!RocmPathArg.empty()) { |
| 152 | ROCmSearchDirs.emplace_back(Args: RocmPathArg.str()); |
| 153 | DoPrintROCmSearchDirs(); |
| 154 | return ROCmSearchDirs; |
| 155 | } else if (std::optional<std::string> RocmPathEnv = |
| 156 | llvm::sys::Process::GetEnv(name: "ROCM_PATH" )) { |
| 157 | if (!RocmPathEnv->empty()) { |
| 158 | ROCmSearchDirs.emplace_back(Args: std::move(*RocmPathEnv)); |
| 159 | DoPrintROCmSearchDirs(); |
| 160 | return ROCmSearchDirs; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | // Try to find relative to the compiler binary. |
| 165 | StringRef InstallDir = D.Dir; |
| 166 | |
| 167 | // Check both a normal Unix prefix position of the clang binary, as well as |
| 168 | // the Windows-esque layout the ROCm packages use with the host architecture |
| 169 | // subdirectory of bin. |
| 170 | auto DeduceROCmPath = [](StringRef ClangPath) { |
| 171 | // Strip off directory (usually bin) |
| 172 | StringRef ParentDir = llvm::sys::path::parent_path(path: ClangPath); |
| 173 | StringRef ParentName = llvm::sys::path::filename(path: ParentDir); |
| 174 | |
| 175 | // Some builds use bin/{host arch}, so go up again. |
| 176 | if (ParentName == "bin" ) { |
| 177 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
| 178 | ParentName = llvm::sys::path::filename(path: ParentDir); |
| 179 | } |
| 180 | |
| 181 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin |
| 182 | // Some versions of the aomp package install to /opt/rocm/aomp/bin |
| 183 | if (ParentName == "llvm" || ParentName.starts_with(Prefix: "aomp" )) |
| 184 | ParentDir = llvm::sys::path::parent_path(path: ParentDir); |
| 185 | |
| 186 | return Candidate(ParentDir.str(), /*StrictChecking=*/true); |
| 187 | }; |
| 188 | |
| 189 | // Deduce ROCm path by the path used to invoke clang. Do not resolve symbolic |
| 190 | // link of clang itself. |
| 191 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(InstallDir)); |
| 192 | |
| 193 | // Deduce ROCm path by the real path of the invoked clang, resolving symbolic |
| 194 | // link of clang itself. |
| 195 | llvm::SmallString<256> RealClangPath; |
| 196 | llvm::sys::fs::real_path(path: D.getClangProgramPath(), output&: RealClangPath); |
| 197 | auto ParentPath = llvm::sys::path::parent_path(path: RealClangPath); |
| 198 | if (ParentPath != InstallDir) |
| 199 | ROCmSearchDirs.emplace_back(Args: DeduceROCmPath(ParentPath)); |
| 200 | |
| 201 | // Device library may be installed in clang or resource directory. |
| 202 | auto ClangRoot = llvm::sys::path::parent_path(path: InstallDir); |
| 203 | auto RealClangRoot = llvm::sys::path::parent_path(path: ParentPath); |
| 204 | ROCmSearchDirs.emplace_back(Args: ClangRoot.str(), /*StrictChecking=*/Args: true); |
| 205 | if (RealClangRoot != ClangRoot) |
| 206 | ROCmSearchDirs.emplace_back(Args: RealClangRoot.str(), /*StrictChecking=*/Args: true); |
| 207 | ROCmSearchDirs.emplace_back(Args: D.ResourceDir, |
| 208 | /*StrictChecking=*/Args: true); |
| 209 | |
| 210 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/rocm" , |
| 211 | /*StrictChecking=*/Args: true); |
| 212 | |
| 213 | // Find the latest /opt/rocm-{release} directory. |
| 214 | std::error_code EC; |
| 215 | std::string LatestROCm; |
| 216 | llvm::VersionTuple LatestVer; |
| 217 | // Get ROCm version from ROCm directory name. |
| 218 | auto GetROCmVersion = [](StringRef DirName) { |
| 219 | llvm::VersionTuple V; |
| 220 | std::string VerStr = DirName.drop_front(N: strlen(s: "rocm-" )).str(); |
| 221 | // The ROCm directory name follows the format of |
| 222 | // rocm-{major}.{minor}.{subMinor}[-{build}] |
| 223 | llvm::replace(Range&: VerStr, OldValue: '-', NewValue: '.'); |
| 224 | V.tryParse(string: VerStr); |
| 225 | return V; |
| 226 | }; |
| 227 | for (llvm::vfs::directory_iterator |
| 228 | File = D.getVFS().dir_begin(Dir: D.SysRoot + "/opt" , EC), |
| 229 | FileEnd; |
| 230 | File != FileEnd && !EC; File.increment(EC)) { |
| 231 | llvm::StringRef FileName = llvm::sys::path::filename(path: File->path()); |
| 232 | if (!FileName.starts_with(Prefix: "rocm-" )) |
| 233 | continue; |
| 234 | if (LatestROCm.empty()) { |
| 235 | LatestROCm = FileName.str(); |
| 236 | LatestVer = GetROCmVersion(LatestROCm); |
| 237 | continue; |
| 238 | } |
| 239 | auto Ver = GetROCmVersion(FileName); |
| 240 | if (LatestVer < Ver) { |
| 241 | LatestROCm = FileName.str(); |
| 242 | LatestVer = Ver; |
| 243 | } |
| 244 | } |
| 245 | if (!LatestROCm.empty()) |
| 246 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/opt/" + LatestROCm, |
| 247 | /*StrictChecking=*/Args: true); |
| 248 | |
| 249 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr/local" , |
| 250 | /*StrictChecking=*/Args: true); |
| 251 | ROCmSearchDirs.emplace_back(Args: D.SysRoot + "/usr" , |
| 252 | /*StrictChecking=*/Args: true); |
| 253 | |
| 254 | DoPrintROCmSearchDirs(); |
| 255 | return ROCmSearchDirs; |
| 256 | } |
| 257 | |
| 258 | RocmInstallationDetector::RocmInstallationDetector( |
| 259 | const Driver &D, const llvm::Triple &HostTriple, |
| 260 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) |
| 261 | : D(D) { |
| 262 | Verbose = Args.hasArg(Ids: options::OPT_v); |
| 263 | RocmPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_rocm_path_EQ); |
| 264 | PrintROCmSearchDirs = |
| 265 | Args.hasArg(Ids: clang::driver::options::OPT_print_rocm_search_dirs); |
| 266 | RocmDeviceLibPathArg = |
| 267 | Args.getAllArgValues(Id: clang::driver::options::OPT_rocm_device_lib_path_EQ); |
| 268 | HIPPathArg = Args.getLastArgValue(Id: clang::driver::options::OPT_hip_path_EQ); |
| 269 | HIPStdParPathArg = |
| 270 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_path_EQ); |
| 271 | HasHIPStdParLibrary = |
| 272 | !HIPStdParPathArg.empty() && D.getVFS().exists(Path: HIPStdParPathArg + |
| 273 | "/hipstdpar_lib.hpp" ); |
| 274 | HIPRocThrustPathArg = |
| 275 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_thrust_path_EQ); |
| 276 | HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && |
| 277 | D.getVFS().exists(Path: HIPRocThrustPathArg + "/thrust" ); |
| 278 | HIPRocPrimPathArg = |
| 279 | Args.getLastArgValue(Id: clang::driver::options::OPT_hipstdpar_prim_path_EQ); |
| 280 | HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && |
| 281 | D.getVFS().exists(Path: HIPRocPrimPathArg + "/rocprim" ); |
| 282 | |
| 283 | if (auto *A = Args.getLastArg(Ids: clang::driver::options::OPT_hip_version_EQ)) { |
| 284 | HIPVersionArg = A->getValue(); |
| 285 | unsigned Major = ~0U; |
| 286 | unsigned Minor = ~0U; |
| 287 | SmallVector<StringRef, 3> Parts; |
| 288 | HIPVersionArg.split(A&: Parts, Separator: '.'); |
| 289 | if (Parts.size()) |
| 290 | Parts[0].getAsInteger(Radix: 0, Result&: Major); |
| 291 | if (Parts.size() > 1) |
| 292 | Parts[1].getAsInteger(Radix: 0, Result&: Minor); |
| 293 | if (Parts.size() > 2) |
| 294 | VersionPatch = Parts[2].str(); |
| 295 | if (VersionPatch.empty()) |
| 296 | VersionPatch = "0" ; |
| 297 | if (Major != ~0U && Minor == ~0U) |
| 298 | Minor = 0; |
| 299 | if (Major == ~0U || Minor == ~0U) |
| 300 | D.Diag(DiagID: diag::err_drv_invalid_value) |
| 301 | << A->getAsString(Args) << HIPVersionArg; |
| 302 | |
| 303 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); |
| 304 | DetectedVersion = |
| 305 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); |
| 306 | } else { |
| 307 | VersionPatch = DefaultVersionPatch; |
| 308 | VersionMajorMinor = |
| 309 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); |
| 310 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + |
| 311 | Twine(DefaultVersionMinor) + "." + VersionPatch) |
| 312 | .str(); |
| 313 | } |
| 314 | |
| 315 | if (DetectHIPRuntime) |
| 316 | detectHIPRuntime(); |
| 317 | if (DetectDeviceLib) |
| 318 | detectDeviceLibrary(); |
| 319 | } |
| 320 | |
| 321 | void RocmInstallationDetector::detectDeviceLibrary() { |
| 322 | assert(LibDevicePath.empty()); |
| 323 | |
| 324 | if (!RocmDeviceLibPathArg.empty()) |
| 325 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; |
| 326 | else if (std::optional<std::string> LibPathEnv = |
| 327 | llvm::sys::Process::GetEnv(name: "HIP_DEVICE_LIB_PATH" )) |
| 328 | LibDevicePath = std::move(*LibPathEnv); |
| 329 | |
| 330 | auto &FS = D.getVFS(); |
| 331 | if (!LibDevicePath.empty()) { |
| 332 | // Maintain compatability with HIP flag/envvar pointing directly at the |
| 333 | // bitcode library directory. This points directly at the library path instead |
| 334 | // of the rocm root installation. |
| 335 | if (!FS.exists(Path: LibDevicePath)) |
| 336 | return; |
| 337 | |
| 338 | scanLibDevicePath(Path: LibDevicePath); |
| 339 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | // Check device library exists at the given path. |
| 344 | auto CheckDeviceLib = [&](StringRef Path, bool StrictChecking) { |
| 345 | bool CheckLibDevice = (!NoBuiltinLibs || StrictChecking); |
| 346 | if (CheckLibDevice && !FS.exists(Path)) |
| 347 | return false; |
| 348 | |
| 349 | scanLibDevicePath(Path); |
| 350 | |
| 351 | if (!NoBuiltinLibs) { |
| 352 | // Check that the required non-target libraries are all available. |
| 353 | if (!allGenericLibsValid()) |
| 354 | return false; |
| 355 | |
| 356 | // Check that we have found at least one libdevice that we can link in |
| 357 | // if -nobuiltinlib hasn't been specified. |
| 358 | if (LibDeviceMap.empty()) |
| 359 | return false; |
| 360 | } |
| 361 | return true; |
| 362 | }; |
| 363 | |
| 364 | // Find device libraries in <LLVM_DIR>/lib/clang/<ver>/lib/amdgcn/bitcode |
| 365 | LibDevicePath = D.ResourceDir; |
| 366 | llvm::sys::path::append(path&: LibDevicePath, CLANG_INSTALL_LIBDIR_BASENAME, |
| 367 | b: "amdgcn" , c: "bitcode" ); |
| 368 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, true); |
| 369 | if (HasDeviceLibrary) |
| 370 | return; |
| 371 | |
| 372 | // Find device libraries in a legacy ROCm directory structure |
| 373 | // ${ROCM_ROOT}/amdgcn/bitcode/* |
| 374 | auto &ROCmDirs = getInstallationPathCandidates(); |
| 375 | for (const auto &Candidate : ROCmDirs) { |
| 376 | LibDevicePath = Candidate.Path; |
| 377 | llvm::sys::path::append(path&: LibDevicePath, a: "amdgcn" , b: "bitcode" ); |
| 378 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath, Candidate.StrictChecking); |
| 379 | if (HasDeviceLibrary) |
| 380 | return; |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | void RocmInstallationDetector::detectHIPRuntime() { |
| 385 | SmallVector<Candidate, 4> HIPSearchDirs; |
| 386 | if (!HIPPathArg.empty()) |
| 387 | HIPSearchDirs.emplace_back(Args: HIPPathArg.str()); |
| 388 | else if (std::optional<std::string> HIPPathEnv = |
| 389 | llvm::sys::Process::GetEnv(name: "HIP_PATH" )) { |
| 390 | if (!HIPPathEnv->empty()) |
| 391 | HIPSearchDirs.emplace_back(Args: std::move(*HIPPathEnv)); |
| 392 | } |
| 393 | if (HIPSearchDirs.empty()) |
| 394 | HIPSearchDirs.append(RHS: getInstallationPathCandidates()); |
| 395 | auto &FS = D.getVFS(); |
| 396 | |
| 397 | for (const auto &Candidate : HIPSearchDirs) { |
| 398 | InstallPath = Candidate.Path; |
| 399 | if (InstallPath.empty() || !FS.exists(Path: InstallPath)) |
| 400 | continue; |
| 401 | |
| 402 | BinPath = InstallPath; |
| 403 | llvm::sys::path::append(path&: BinPath, a: "bin" ); |
| 404 | IncludePath = InstallPath; |
| 405 | llvm::sys::path::append(path&: IncludePath, a: "include" ); |
| 406 | LibPath = InstallPath; |
| 407 | llvm::sys::path::append(path&: LibPath, a: "lib" ); |
| 408 | SharePath = InstallPath; |
| 409 | llvm::sys::path::append(path&: SharePath, a: "share" ); |
| 410 | |
| 411 | // Get parent of InstallPath and append "share" |
| 412 | SmallString<0> ParentSharePath = llvm::sys::path::parent_path(path: InstallPath); |
| 413 | llvm::sys::path::append(path&: ParentSharePath, a: "share" ); |
| 414 | |
| 415 | auto Append = [](SmallString<0> &path, const Twine &a, const Twine &b = "" , |
| 416 | const Twine &c = "" , const Twine &d = "" ) { |
| 417 | SmallString<0> newpath = path; |
| 418 | llvm::sys::path::append(path&: newpath, a, b, c, d); |
| 419 | return newpath; |
| 420 | }; |
| 421 | // If HIP version file can be found and parsed, use HIP version from there. |
| 422 | std::vector<SmallString<0>> VersionFilePaths = { |
| 423 | Append(SharePath, "hip" , "version" ), |
| 424 | InstallPath != D.SysRoot + "/usr/local" |
| 425 | ? Append(ParentSharePath, "hip" , "version" ) |
| 426 | : SmallString<0>(), |
| 427 | Append(BinPath, ".hipVersion" )}; |
| 428 | |
| 429 | for (const auto &VersionFilePath : VersionFilePaths) { |
| 430 | if (VersionFilePath.empty()) |
| 431 | continue; |
| 432 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = |
| 433 | FS.getBufferForFile(Name: VersionFilePath); |
| 434 | if (!VersionFile) |
| 435 | continue; |
| 436 | if (HIPVersionArg.empty() && VersionFile) |
| 437 | if (parseHIPVersionFile(V: (*VersionFile)->getBuffer())) |
| 438 | continue; |
| 439 | |
| 440 | HasHIPRuntime = true; |
| 441 | return; |
| 442 | } |
| 443 | // Otherwise, if -rocm-path is specified (no strict checking), use the |
| 444 | // default HIP version or specified by --hip-version. |
| 445 | if (!Candidate.StrictChecking) { |
| 446 | HasHIPRuntime = true; |
| 447 | return; |
| 448 | } |
| 449 | } |
| 450 | HasHIPRuntime = false; |
| 451 | } |
| 452 | |
| 453 | void RocmInstallationDetector::print(raw_ostream &OS) const { |
| 454 | if (hasHIPRuntime()) |
| 455 | OS << "Found HIP installation: " << InstallPath << ", version " |
| 456 | << DetectedVersion << '\n'; |
| 457 | } |
| 458 | |
| 459 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, |
| 460 | ArgStringList &CC1Args) const { |
| 461 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && |
| 462 | !DriverArgs.hasArg(Ids: options::OPT_nohipwrapperinc); |
| 463 | bool HasHipStdPar = DriverArgs.hasArg(Ids: options::OPT_hipstdpar); |
| 464 | |
| 465 | if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) { |
| 466 | // HIP header includes standard library wrapper headers under clang |
| 467 | // cuda_wrappers directory. Since these wrapper headers include_next |
| 468 | // standard C++ headers, whereas libc++ headers include_next other clang |
| 469 | // headers. The include paths have to follow this order: |
| 470 | // - wrapper include path |
| 471 | // - standard C++ include path |
| 472 | // - other clang include path |
| 473 | // Since standard C++ and other clang include paths are added in other |
| 474 | // places after this function, here we only need to make sure wrapper |
| 475 | // include path is added. |
| 476 | // |
| 477 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs |
| 478 | // a workaround. |
| 479 | SmallString<128> P(D.ResourceDir); |
| 480 | if (UsesRuntimeWrapper) |
| 481 | llvm::sys::path::append(path&: P, a: "include" , b: "cuda_wrappers" ); |
| 482 | CC1Args.push_back(Elt: "-internal-isystem" ); |
| 483 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P)); |
| 484 | } |
| 485 | |
| 486 | const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { |
| 487 | StringRef Inc = getIncludePath(); |
| 488 | auto &FS = D.getVFS(); |
| 489 | |
| 490 | if (!hasHIPStdParLibrary()) |
| 491 | if (!HIPStdParPathArg.empty() || |
| 492 | !FS.exists(Path: Inc + "/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp" )) { |
| 493 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_lib); |
| 494 | return; |
| 495 | } |
| 496 | if (!HasRocThrustLibrary && !FS.exists(Path: Inc + "/thrust" )) { |
| 497 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_thrust_lib); |
| 498 | return; |
| 499 | } |
| 500 | if (!HasRocPrimLibrary && !FS.exists(Path: Inc + "/rocprim" )) { |
| 501 | D.Diag(DiagID: diag::err_drv_no_hipstdpar_prim_lib); |
| 502 | return; |
| 503 | } |
| 504 | const char *ThrustPath; |
| 505 | if (HasRocThrustLibrary) |
| 506 | ThrustPath = DriverArgs.MakeArgString(Str: HIPRocThrustPathArg); |
| 507 | else |
| 508 | ThrustPath = DriverArgs.MakeArgString(Str: Inc + "/thrust" ); |
| 509 | |
| 510 | const char *HIPStdParPath; |
| 511 | if (hasHIPStdParLibrary()) |
| 512 | HIPStdParPath = DriverArgs.MakeArgString(Str: HIPStdParPathArg); |
| 513 | else |
| 514 | HIPStdParPath = DriverArgs.MakeArgString(Str: StringRef(ThrustPath) + |
| 515 | "/system/hip/hipstdpar" ); |
| 516 | |
| 517 | const char *PrimPath; |
| 518 | if (HasRocPrimLibrary) |
| 519 | PrimPath = DriverArgs.MakeArgString(Str: HIPRocPrimPathArg); |
| 520 | else |
| 521 | PrimPath = DriverArgs.MakeArgString(Str: getIncludePath() + "/rocprim" ); |
| 522 | |
| 523 | CC1Args.append(IL: {"-idirafter" , ThrustPath, "-idirafter" , PrimPath, |
| 524 | "-idirafter" , HIPStdParPath, "-include" , |
| 525 | "hipstdpar_lib.hpp" }); |
| 526 | }; |
| 527 | |
| 528 | if (!DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc, |
| 529 | Default: true)) { |
| 530 | if (HasHipStdPar) |
| 531 | HandleHipStdPar(); |
| 532 | |
| 533 | return; |
| 534 | } |
| 535 | |
| 536 | if (!hasHIPRuntime()) { |
| 537 | D.Diag(DiagID: diag::err_drv_no_hip_runtime); |
| 538 | return; |
| 539 | } |
| 540 | |
| 541 | CC1Args.push_back(Elt: "-idirafter" ); |
| 542 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: getIncludePath())); |
| 543 | if (UsesRuntimeWrapper) |
| 544 | CC1Args.append(IL: {"-include" , "__clang_hip_runtime_wrapper.h" }); |
| 545 | if (HasHipStdPar) |
| 546 | HandleHipStdPar(); |
| 547 | } |
| 548 | |
| 549 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, |
| 550 | const InputInfo &Output, |
| 551 | const InputInfoList &Inputs, |
| 552 | const ArgList &Args, |
| 553 | const char *LinkingOutput) const { |
| 554 | std::string Linker = getToolChain().GetLinkerPath(); |
| 555 | ArgStringList CmdArgs; |
| 556 | if (!Args.hasArg(Ids: options::OPT_r)) { |
| 557 | CmdArgs.push_back(Elt: "--no-undefined" ); |
| 558 | CmdArgs.push_back(Elt: "-shared" ); |
| 559 | } |
| 560 | |
| 561 | if (C.getDriver().isUsingLTO()) { |
| 562 | const bool ThinLTO = (C.getDriver().getLTOMode() == LTOK_Thin); |
| 563 | addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs, IsThinLTO: ThinLTO); |
| 564 | } else if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) { |
| 565 | CmdArgs.push_back(Elt: Args.MakeArgString( |
| 566 | Str: "-plugin-opt=mcpu=" + |
| 567 | getProcessorFromTargetID(T: getToolChain().getTriple(), |
| 568 | OffloadArch: Args.getLastArgValue(Id: options::OPT_mcpu_EQ)))); |
| 569 | } |
| 570 | addLinkerCompressDebugSectionsOption(TC: getToolChain(), Args, CmdArgs); |
| 571 | getToolChain().AddFilePathLibArgs(Args, CmdArgs); |
| 572 | Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L); |
| 573 | AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA); |
| 574 | |
| 575 | // Always pass the target-id features to the LTO job. |
| 576 | std::vector<StringRef> Features; |
| 577 | getAMDGPUTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args, |
| 578 | Features); |
| 579 | if (!Features.empty()) { |
| 580 | CmdArgs.push_back( |
| 581 | Elt: Args.MakeArgString(Str: "-plugin-opt=-mattr=" + llvm::join(R&: Features, Separator: "," ))); |
| 582 | } |
| 583 | |
| 584 | if (Args.hasArg(Ids: options::OPT_stdlib)) |
| 585 | CmdArgs.append(IL: {"-lc" , "-lm" }); |
| 586 | if (Args.hasArg(Ids: options::OPT_startfiles)) { |
| 587 | std::optional<std::string> IncludePath = getToolChain().getStdlibPath(); |
| 588 | if (!IncludePath) |
| 589 | IncludePath = "/lib" ; |
| 590 | SmallString<128> P(*IncludePath); |
| 591 | llvm::sys::path::append(path&: P, a: "crt1.o" ); |
| 592 | CmdArgs.push_back(Elt: Args.MakeArgString(Str: P)); |
| 593 | } |
| 594 | |
| 595 | CmdArgs.push_back(Elt: "-o" ); |
| 596 | CmdArgs.push_back(Elt: Output.getFilename()); |
| 597 | C.addCommand(C: std::make_unique<Command>( |
| 598 | args: JA, args: *this, args: ResponseFileSupport::AtFileCurCP(), args: Args.MakeArgString(Str: Linker), |
| 599 | args&: CmdArgs, args: Inputs, args: Output)); |
| 600 | } |
| 601 | |
| 602 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, |
| 603 | const llvm::Triple &Triple, |
| 604 | const llvm::opt::ArgList &Args, |
| 605 | std::vector<StringRef> &Features) { |
| 606 | // Add target ID features to -target-feature options. No diagnostics should |
| 607 | // be emitted here since invalid target ID is diagnosed at other places. |
| 608 | StringRef TargetID; |
| 609 | if (Args.hasArg(Ids: options::OPT_mcpu_EQ)) |
| 610 | TargetID = Args.getLastArgValue(Id: options::OPT_mcpu_EQ); |
| 611 | else if (Args.hasArg(Ids: options::OPT_march_EQ)) |
| 612 | TargetID = Args.getLastArgValue(Id: options::OPT_march_EQ); |
| 613 | if (!TargetID.empty()) { |
| 614 | llvm::StringMap<bool> FeatureMap; |
| 615 | auto OptionalGpuArch = parseTargetID(T: Triple, OffloadArch: TargetID, FeatureMap: &FeatureMap); |
| 616 | if (OptionalGpuArch) { |
| 617 | StringRef GpuArch = *OptionalGpuArch; |
| 618 | // Iterate through all possible target ID features for the given GPU. |
| 619 | // If it is mapped to true, add +feature. |
| 620 | // If it is mapped to false, add -feature. |
| 621 | // If it is not in the map (default), do not add it |
| 622 | for (auto &&Feature : getAllPossibleTargetIDFeatures(T: Triple, Processor: GpuArch)) { |
| 623 | auto Pos = FeatureMap.find(Key: Feature); |
| 624 | if (Pos == FeatureMap.end()) |
| 625 | continue; |
| 626 | Features.push_back(x: Args.MakeArgStringRef( |
| 627 | Str: (Twine(Pos->second ? "+" : "-" ) + Feature).str())); |
| 628 | } |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | if (Args.hasFlag(Pos: options::OPT_mwavefrontsize64, |
| 633 | Neg: options::OPT_mno_wavefrontsize64, Default: false)) |
| 634 | Features.push_back(x: "+wavefrontsize64" ); |
| 635 | |
| 636 | if (Args.hasFlag(Pos: options::OPT_mamdgpu_precise_memory_op, |
| 637 | Neg: options::OPT_mno_amdgpu_precise_memory_op, Default: false)) |
| 638 | Features.push_back(x: "+precise-memory" ); |
| 639 | |
| 640 | handleTargetFeaturesGroup(D, Triple, Args, Features, |
| 641 | Group: options::OPT_m_amdgpu_Features_Group); |
| 642 | } |
| 643 | |
| 644 | /// AMDGPU Toolchain |
| 645 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, |
| 646 | const ArgList &Args) |
| 647 | : Generic_ELF(D, Triple, Args), |
| 648 | OptionsDefault( |
| 649 | {{options::OPT_O, "3" }, {options::OPT_cl_std_EQ, "CL1.2" }}) { |
| 650 | // Check code object version options. Emit warnings for legacy options |
| 651 | // and errors for the last invalid code object version options. |
| 652 | // It is done here to avoid repeated warning or error messages for |
| 653 | // each tool invocation. |
| 654 | checkAMDGPUCodeObjectVersion(D, Args); |
| 655 | } |
| 656 | |
| 657 | Tool *AMDGPUToolChain::buildLinker() const { |
| 658 | return new tools::amdgpu::Linker(*this); |
| 659 | } |
| 660 | |
| 661 | DerivedArgList * |
| 662 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, |
| 663 | Action::OffloadKind DeviceOffloadKind) const { |
| 664 | |
| 665 | DerivedArgList *DAL = |
| 666 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
| 667 | |
| 668 | const OptTable &Opts = getDriver().getOpts(); |
| 669 | |
| 670 | if (!DAL) |
| 671 | DAL = new DerivedArgList(Args.getBaseArgs()); |
| 672 | |
| 673 | for (Arg *A : Args) |
| 674 | DAL->append(A); |
| 675 | |
| 676 | // Replace -mcpu=native with detected GPU. |
| 677 | Arg *LastMCPUArg = DAL->getLastArg(Ids: options::OPT_mcpu_EQ); |
| 678 | if (LastMCPUArg && StringRef(LastMCPUArg->getValue()) == "native" ) { |
| 679 | DAL->eraseArg(Id: options::OPT_mcpu_EQ); |
| 680 | auto GPUsOrErr = getSystemGPUArchs(Args); |
| 681 | if (!GPUsOrErr) { |
| 682 | getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch) |
| 683 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
| 684 | << llvm::toString(E: GPUsOrErr.takeError()) << "-mcpu" ; |
| 685 | } else { |
| 686 | auto &GPUs = *GPUsOrErr; |
| 687 | if (GPUs.size() > 1) { |
| 688 | getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch) |
| 689 | << llvm::Triple::getArchTypeName(Kind: getArch()) |
| 690 | << llvm::join(R&: GPUs, Separator: ", " ) << "-mcpu" ; |
| 691 | } |
| 692 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_mcpu_EQ), |
| 693 | Value: Args.MakeArgString(Str: GPUs.front())); |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | checkTargetID(DriverArgs: *DAL); |
| 698 | |
| 699 | if (Args.getLastArgValue(Id: options::OPT_x) != "cl" ) |
| 700 | return DAL; |
| 701 | |
| 702 | // Phase 1 (.cl -> .bc) |
| 703 | if (Args.hasArg(Ids: options::OPT_c) && Args.hasArg(Ids: options::OPT_emit_llvm)) { |
| 704 | DAL->AddFlagArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: getTriple().isArch64Bit() |
| 705 | ? options::OPT_m64 |
| 706 | : options::OPT_m32)); |
| 707 | |
| 708 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately |
| 709 | // as they defined that way in Options.td |
| 710 | if (!Args.hasArg(Ids: options::OPT_O, Ids: options::OPT_O0, Ids: options::OPT_O4, |
| 711 | Ids: options::OPT_Ofast)) |
| 712 | DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_O), |
| 713 | Value: getOptionDefault(OptID: options::OPT_O)); |
| 714 | } |
| 715 | |
| 716 | return DAL; |
| 717 | } |
| 718 | |
| 719 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( |
| 720 | llvm::AMDGPU::GPUKind Kind) { |
| 721 | |
| 722 | // Assume nothing without a specific target. |
| 723 | if (Kind == llvm::AMDGPU::GK_NONE) |
| 724 | return false; |
| 725 | |
| 726 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
| 727 | |
| 728 | // Default to enabling f32 denormals by default on subtargets where fma is |
| 729 | // fast with denormals |
| 730 | const bool BothDenormAndFMAFast = |
| 731 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && |
| 732 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); |
| 733 | return !BothDenormAndFMAFast; |
| 734 | } |
| 735 | |
| 736 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( |
| 737 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, |
| 738 | const llvm::fltSemantics *FPType) const { |
| 739 | // Denormals should always be enabled for f16 and f64. |
| 740 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) |
| 741 | return llvm::DenormalMode::getIEEE(); |
| 742 | |
| 743 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || |
| 744 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { |
| 745 | auto Arch = getProcessorFromTargetID(T: getTriple(), OffloadArch: JA.getOffloadingArch()); |
| 746 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: Arch); |
| 747 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && |
| 748 | DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
| 749 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
| 750 | Default: getDefaultDenormsAreZeroForTarget(Kind))) |
| 751 | return llvm::DenormalMode::getPreserveSign(); |
| 752 | |
| 753 | return llvm::DenormalMode::getIEEE(); |
| 754 | } |
| 755 | |
| 756 | const StringRef GpuArch = getGPUArch(DriverArgs); |
| 757 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
| 758 | |
| 759 | // TODO: There are way too many flags that change this. Do we need to check |
| 760 | // them all? |
| 761 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
| 762 | getDefaultDenormsAreZeroForTarget(Kind); |
| 763 | |
| 764 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are |
| 765 | // also implicit treated as zero (DAZ). |
| 766 | return DAZ ? llvm::DenormalMode::getPreserveSign() : |
| 767 | llvm::DenormalMode::getIEEE(); |
| 768 | } |
| 769 | |
| 770 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, |
| 771 | llvm::AMDGPU::GPUKind Kind) { |
| 772 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(AK: Kind); |
| 773 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); |
| 774 | |
| 775 | return !HasWave32 || DriverArgs.hasFlag( |
| 776 | Pos: options::OPT_mwavefrontsize64, Neg: options::OPT_mno_wavefrontsize64, Default: false); |
| 777 | } |
| 778 | |
| 779 | |
| 780 | /// ROCM Toolchain |
| 781 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, |
| 782 | const ArgList &Args) |
| 783 | : AMDGPUToolChain(D, Triple, Args) { |
| 784 | RocmInstallation->detectDeviceLibrary(); |
| 785 | } |
| 786 | |
| 787 | void AMDGPUToolChain::addClangTargetOptions( |
| 788 | const llvm::opt::ArgList &DriverArgs, |
| 789 | llvm::opt::ArgStringList &CC1Args, |
| 790 | Action::OffloadKind DeviceOffloadingKind) const { |
| 791 | // Default to "hidden" visibility, as object level linking will not be |
| 792 | // supported for the foreseeable future. |
| 793 | if (!DriverArgs.hasArg(Ids: options::OPT_fvisibility_EQ, |
| 794 | Ids: options::OPT_fvisibility_ms_compat)) { |
| 795 | CC1Args.push_back(Elt: "-fvisibility=hidden" ); |
| 796 | CC1Args.push_back(Elt: "-fapply-global-visibility-to-externs" ); |
| 797 | } |
| 798 | } |
| 799 | |
| 800 | void AMDGPUToolChain::addClangWarningOptions(ArgStringList &CC1Args) const { |
| 801 | // AMDGPU does not support atomic lib call. Treat atomic alignment |
| 802 | // warnings as errors. |
| 803 | CC1Args.push_back(Elt: "-Werror=atomic-alignment" ); |
| 804 | } |
| 805 | |
| 806 | StringRef |
| 807 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { |
| 808 | return getProcessorFromTargetID( |
| 809 | T: getTriple(), OffloadArch: DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ)); |
| 810 | } |
| 811 | |
| 812 | AMDGPUToolChain::ParsedTargetIDType |
| 813 | AMDGPUToolChain::getParsedTargetID(const llvm::opt::ArgList &DriverArgs) const { |
| 814 | StringRef TargetID = DriverArgs.getLastArgValue(Id: options::OPT_mcpu_EQ); |
| 815 | if (TargetID.empty()) |
| 816 | return {.OptionalTargetID: std::nullopt, .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
| 817 | |
| 818 | llvm::StringMap<bool> FeatureMap; |
| 819 | auto OptionalGpuArch = parseTargetID(T: getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
| 820 | if (!OptionalGpuArch) |
| 821 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: std::nullopt, .OptionalFeatures: std::nullopt}; |
| 822 | |
| 823 | return {.OptionalTargetID: TargetID.str(), .OptionalGPUArch: OptionalGpuArch->str(), .OptionalFeatures: FeatureMap}; |
| 824 | } |
| 825 | |
| 826 | void AMDGPUToolChain::checkTargetID( |
| 827 | const llvm::opt::ArgList &DriverArgs) const { |
| 828 | auto PTID = getParsedTargetID(DriverArgs); |
| 829 | if (PTID.OptionalTargetID && !PTID.OptionalGPUArch) { |
| 830 | getDriver().Diag(DiagID: clang::diag::err_drv_bad_target_id) |
| 831 | << *PTID.OptionalTargetID; |
| 832 | } |
| 833 | } |
| 834 | |
| 835 | Expected<SmallVector<std::string>> |
| 836 | AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { |
| 837 | // Detect AMD GPUs availible on the system. |
| 838 | std::string Program; |
| 839 | if (Arg *A = Args.getLastArg(Ids: options::OPT_amdgpu_arch_tool_EQ)) |
| 840 | Program = A->getValue(); |
| 841 | else |
| 842 | Program = GetProgramPath(Name: "amdgpu-arch" ); |
| 843 | |
| 844 | auto StdoutOrErr = executeToolChainProgram(Executable: Program); |
| 845 | if (!StdoutOrErr) |
| 846 | return StdoutOrErr.takeError(); |
| 847 | |
| 848 | SmallVector<std::string, 1> GPUArchs; |
| 849 | for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n" )) |
| 850 | if (!Arch.empty()) |
| 851 | GPUArchs.push_back(Elt: Arch.str()); |
| 852 | |
| 853 | if (GPUArchs.empty()) |
| 854 | return llvm::createStringError(EC: std::error_code(), |
| 855 | S: "No AMD GPU detected in the system" ); |
| 856 | |
| 857 | return std::move(GPUArchs); |
| 858 | } |
| 859 | |
| 860 | void ROCMToolChain::addClangTargetOptions( |
| 861 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
| 862 | Action::OffloadKind DeviceOffloadingKind) const { |
| 863 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, |
| 864 | DeviceOffloadingKind); |
| 865 | |
| 866 | // For the OpenCL case where there is no offload target, accept -nostdlib to |
| 867 | // disable bitcode linking. |
| 868 | if (DeviceOffloadingKind == Action::OFK_None && |
| 869 | DriverArgs.hasArg(Ids: options::OPT_nostdlib)) |
| 870 | return; |
| 871 | |
| 872 | if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, |
| 873 | Default: true)) |
| 874 | return; |
| 875 | |
| 876 | // Get the device name and canonicalize it |
| 877 | const StringRef GpuArch = getGPUArch(DriverArgs); |
| 878 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GpuArch); |
| 879 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
| 880 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
| 881 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
| 882 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
| 883 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
| 884 | ABIVer)) |
| 885 | return; |
| 886 | |
| 887 | bool Wave64 = isWave64(DriverArgs, Kind); |
| 888 | // TODO: There are way too many flags that change this. Do we need to check |
| 889 | // them all? |
| 890 | bool DAZ = DriverArgs.hasArg(Ids: options::OPT_cl_denorms_are_zero) || |
| 891 | getDefaultDenormsAreZeroForTarget(Kind); |
| 892 | bool FiniteOnly = DriverArgs.hasArg(Ids: options::OPT_cl_finite_math_only); |
| 893 | |
| 894 | bool UnsafeMathOpt = |
| 895 | DriverArgs.hasArg(Ids: options::OPT_cl_unsafe_math_optimizations); |
| 896 | bool FastRelaxedMath = DriverArgs.hasArg(Ids: options::OPT_cl_fast_relaxed_math); |
| 897 | bool CorrectSqrt = |
| 898 | DriverArgs.hasArg(Ids: options::OPT_cl_fp32_correctly_rounded_divide_sqrt); |
| 899 | |
| 900 | // GPU Sanitizer currently only supports ASan and is enabled through host |
| 901 | // ASan. |
| 902 | bool GPUSan = DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
| 903 | Neg: options::OPT_fno_gpu_sanitize, Default: true) && |
| 904 | getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt(); |
| 905 | |
| 906 | // Add the OpenCL specific bitcode library. |
| 907 | llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; |
| 908 | BCLibs.emplace_back(Args: RocmInstallation->getOpenCLPath().str()); |
| 909 | |
| 910 | // Add the generic set of libraries. |
| 911 | BCLibs.append(RHS: RocmInstallation->getCommonBitcodeLibs( |
| 912 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
| 913 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP: false)); |
| 914 | |
| 915 | for (auto [BCFile, Internalize] : BCLibs) { |
| 916 | if (Internalize) |
| 917 | CC1Args.push_back(Elt: "-mlink-builtin-bitcode" ); |
| 918 | else |
| 919 | CC1Args.push_back(Elt: "-mlink-bitcode-file" ); |
| 920 | CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: BCFile)); |
| 921 | } |
| 922 | } |
| 923 | |
| 924 | bool RocmInstallationDetector::checkCommonBitcodeLibs( |
| 925 | StringRef GPUArch, StringRef LibDeviceFile, |
| 926 | DeviceLibABIVersion ABIVer) const { |
| 927 | if (!hasDeviceLibrary()) { |
| 928 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 0; |
| 929 | return false; |
| 930 | } |
| 931 | if (LibDeviceFile.empty()) { |
| 932 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 1 << GPUArch; |
| 933 | return false; |
| 934 | } |
| 935 | if (ABIVer.requiresLibrary() && getABIVersionPath(ABIVer).empty()) { |
| 936 | // Starting from COV6, we will report minimum ROCm version requirement in |
| 937 | // the error message. |
| 938 | if (ABIVer.getAsCodeObjectVersion() < 6) |
| 939 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) << 2 << ABIVer.toString() << 0; |
| 940 | else |
| 941 | D.Diag(DiagID: diag::err_drv_no_rocm_device_lib) |
| 942 | << 2 << ABIVer.toString() << 1 << "6.3" ; |
| 943 | return false; |
| 944 | } |
| 945 | return true; |
| 946 | } |
| 947 | |
| 948 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
| 949 | RocmInstallationDetector::getCommonBitcodeLibs( |
| 950 | const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64, |
| 951 | bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath, |
| 952 | bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan, |
| 953 | bool isOpenMP) const { |
| 954 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs; |
| 955 | |
| 956 | auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib, |
| 957 | bool Internalize = true) { |
| 958 | BCLib.ShouldInternalize = Internalize; |
| 959 | BCLibs.emplace_back(Args&: BCLib); |
| 960 | }; |
| 961 | auto AddSanBCLibs = [&]() { |
| 962 | if (GPUSan) |
| 963 | AddBCLib(getAsanRTLPath(), false); |
| 964 | }; |
| 965 | |
| 966 | AddSanBCLibs(); |
| 967 | AddBCLib(getOCMLPath()); |
| 968 | if (!isOpenMP) |
| 969 | AddBCLib(getOCKLPath()); |
| 970 | else if (GPUSan && isOpenMP) |
| 971 | AddBCLib(getOCKLPath(), false); |
| 972 | AddBCLib(getDenormalsAreZeroPath(Enabled: DAZ)); |
| 973 | AddBCLib(getUnsafeMathPath(Enabled: UnsafeMathOpt || FastRelaxedMath)); |
| 974 | AddBCLib(getFiniteOnlyPath(Enabled: FiniteOnly || FastRelaxedMath)); |
| 975 | AddBCLib(getCorrectlyRoundedSqrtPath(Enabled: CorrectSqrt)); |
| 976 | AddBCLib(getWavefrontSize64Path(Enabled: Wave64)); |
| 977 | AddBCLib(LibDeviceFile); |
| 978 | auto ABIVerPath = getABIVersionPath(ABIVer); |
| 979 | if (!ABIVerPath.empty()) |
| 980 | AddBCLib(ABIVerPath); |
| 981 | |
| 982 | return BCLibs; |
| 983 | } |
| 984 | |
| 985 | llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> |
| 986 | ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs, |
| 987 | const std::string &GPUArch, |
| 988 | bool isOpenMP) const { |
| 989 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(CPU: GPUArch); |
| 990 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(AK: Kind); |
| 991 | |
| 992 | StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(Gpu: CanonArch); |
| 993 | auto ABIVer = DeviceLibABIVersion::fromCodeObjectVersion( |
| 994 | CodeObjectVersion: getAMDGPUCodeObjectVersion(D: getDriver(), Args: DriverArgs)); |
| 995 | if (!RocmInstallation->checkCommonBitcodeLibs(GPUArch: CanonArch, LibDeviceFile, |
| 996 | ABIVer)) |
| 997 | return {}; |
| 998 | |
| 999 | // If --hip-device-lib is not set, add the default bitcode libraries. |
| 1000 | // TODO: There are way too many flags that change this. Do we need to check |
| 1001 | // them all? |
| 1002 | bool DAZ = DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero, |
| 1003 | Neg: options::OPT_fno_gpu_flush_denormals_to_zero, |
| 1004 | Default: getDefaultDenormsAreZeroForTarget(Kind)); |
| 1005 | bool FiniteOnly = DriverArgs.hasFlag( |
| 1006 | Pos: options::OPT_ffinite_math_only, Neg: options::OPT_fno_finite_math_only, Default: false); |
| 1007 | bool UnsafeMathOpt = |
| 1008 | DriverArgs.hasFlag(Pos: options::OPT_funsafe_math_optimizations, |
| 1009 | Neg: options::OPT_fno_unsafe_math_optimizations, Default: false); |
| 1010 | bool FastRelaxedMath = DriverArgs.hasFlag(Pos: options::OPT_ffast_math, |
| 1011 | Neg: options::OPT_fno_fast_math, Default: false); |
| 1012 | bool CorrectSqrt = DriverArgs.hasFlag( |
| 1013 | Pos: options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, |
| 1014 | Neg: options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, Default: true); |
| 1015 | bool Wave64 = isWave64(DriverArgs, Kind); |
| 1016 | |
| 1017 | // GPU Sanitizer currently only supports ASan and is enabled through host |
| 1018 | // ASan. |
| 1019 | bool GPUSan = DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
| 1020 | Neg: options::OPT_fno_gpu_sanitize, Default: true) && |
| 1021 | getSanitizerArgs(JobArgs: DriverArgs).needsAsanRt(); |
| 1022 | |
| 1023 | return RocmInstallation->getCommonBitcodeLibs( |
| 1024 | DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt, |
| 1025 | FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP); |
| 1026 | } |
| 1027 | |
| 1028 | bool AMDGPUToolChain::shouldSkipSanitizeOption( |
| 1029 | const ToolChain &TC, const llvm::opt::ArgList &DriverArgs, |
| 1030 | StringRef TargetID, const llvm::opt::Arg *A) const { |
| 1031 | // For actions without targetID, do nothing. |
| 1032 | if (TargetID.empty()) |
| 1033 | return false; |
| 1034 | Option O = A->getOption(); |
| 1035 | |
| 1036 | if (!O.matches(ID: options::OPT_fsanitize_EQ)) |
| 1037 | return false; |
| 1038 | |
| 1039 | if (!DriverArgs.hasFlag(Pos: options::OPT_fgpu_sanitize, |
| 1040 | Neg: options::OPT_fno_gpu_sanitize, Default: true)) |
| 1041 | return true; |
| 1042 | |
| 1043 | auto &Diags = TC.getDriver().getDiags(); |
| 1044 | |
| 1045 | // For simplicity, we only allow -fsanitize=address |
| 1046 | SanitizerMask K = parseSanitizerValue(Value: A->getValue(), /*AllowGroups=*/false); |
| 1047 | if (K != SanitizerKind::Address) |
| 1048 | return true; |
| 1049 | |
| 1050 | llvm::StringMap<bool> FeatureMap; |
| 1051 | auto OptionalGpuArch = parseTargetID(T: TC.getTriple(), OffloadArch: TargetID, FeatureMap: &FeatureMap); |
| 1052 | |
| 1053 | assert(OptionalGpuArch && "Invalid Target ID" ); |
| 1054 | (void)OptionalGpuArch; |
| 1055 | auto Loc = FeatureMap.find(Key: "xnack" ); |
| 1056 | if (Loc == FeatureMap.end() || !Loc->second) { |
| 1057 | Diags.Report( |
| 1058 | DiagID: clang::diag::warn_drv_unsupported_option_for_offload_arch_req_feature) |
| 1059 | << A->getAsString(Args: DriverArgs) << TargetID << "xnack+" ; |
| 1060 | return true; |
| 1061 | } |
| 1062 | return false; |
| 1063 | } |
| 1064 | |