1//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Cuda.h"
10#include "clang/Basic/Cuda.h"
11#include "clang/Config/config.h"
12#include "clang/Driver/CommonArgs.h"
13#include "clang/Driver/Compilation.h"
14#include "clang/Driver/Distro.h"
15#include "clang/Driver/Driver.h"
16#include "clang/Driver/InputInfo.h"
17#include "clang/Options/Options.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
21#include "llvm/Option/ArgList.h"
22#include "llvm/Support/FileSystem.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/Process.h"
25#include "llvm/Support/Program.h"
26#include "llvm/Support/VirtualFileSystem.h"
27#include "llvm/TargetParser/Host.h"
28#include "llvm/TargetParser/TargetParser.h"
29#include <system_error>
30
31using namespace clang::driver;
32using namespace clang::driver::toolchains;
33using namespace clang::driver::tools;
34using namespace clang;
35using namespace llvm::opt;
36
37namespace {
38
39CudaVersion getCudaVersion(uint32_t raw_version) {
40 if (raw_version < 7050)
41 return CudaVersion::CUDA_70;
42 if (raw_version < 8000)
43 return CudaVersion::CUDA_75;
44 if (raw_version < 9000)
45 return CudaVersion::CUDA_80;
46 if (raw_version < 9010)
47 return CudaVersion::CUDA_90;
48 if (raw_version < 9020)
49 return CudaVersion::CUDA_91;
50 if (raw_version < 10000)
51 return CudaVersion::CUDA_92;
52 if (raw_version < 10010)
53 return CudaVersion::CUDA_100;
54 if (raw_version < 10020)
55 return CudaVersion::CUDA_101;
56 if (raw_version < 11000)
57 return CudaVersion::CUDA_102;
58 if (raw_version < 11010)
59 return CudaVersion::CUDA_110;
60 if (raw_version < 11020)
61 return CudaVersion::CUDA_111;
62 if (raw_version < 11030)
63 return CudaVersion::CUDA_112;
64 if (raw_version < 11040)
65 return CudaVersion::CUDA_113;
66 if (raw_version < 11050)
67 return CudaVersion::CUDA_114;
68 if (raw_version < 11060)
69 return CudaVersion::CUDA_115;
70 if (raw_version < 11070)
71 return CudaVersion::CUDA_116;
72 if (raw_version < 11080)
73 return CudaVersion::CUDA_117;
74 if (raw_version < 11090)
75 return CudaVersion::CUDA_118;
76 if (raw_version < 12010)
77 return CudaVersion::CUDA_120;
78 if (raw_version < 12020)
79 return CudaVersion::CUDA_121;
80 if (raw_version < 12030)
81 return CudaVersion::CUDA_122;
82 if (raw_version < 12040)
83 return CudaVersion::CUDA_123;
84 if (raw_version < 12050)
85 return CudaVersion::CUDA_124;
86 if (raw_version < 12060)
87 return CudaVersion::CUDA_125;
88 if (raw_version < 12070)
89 return CudaVersion::CUDA_126;
90 if (raw_version < 12090)
91 return CudaVersion::CUDA_128;
92 if (raw_version < 13000)
93 return CudaVersion::CUDA_129;
94 if (raw_version < 13010)
95 return CudaVersion::CUDA_130;
96 if (raw_version < 13020)
97 return CudaVersion::CUDA_131;
98 if (raw_version < 13030)
99 return CudaVersion::CUDA_132;
100 return CudaVersion::NEW;
101}
102
103CudaVersion parseCudaHFile(llvm::StringRef Input) {
104 // Helper lambda which skips the words if the line starts with them or returns
105 // std::nullopt otherwise.
106 auto StartsWithWords =
107 [](llvm::StringRef Line,
108 const SmallVector<StringRef, 3> words) -> std::optional<StringRef> {
109 for (StringRef word : words) {
110 if (!Line.consume_front(Prefix: word))
111 return {};
112 Line = Line.ltrim();
113 }
114 return Line;
115 };
116
117 Input = Input.ltrim();
118 while (!Input.empty()) {
119 if (auto Line =
120 StartsWithWords(Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
121 uint32_t RawVersion;
122 Line->consumeInteger(Radix: 10, Result&: RawVersion);
123 return getCudaVersion(raw_version: RawVersion);
124 }
125 // Find next non-empty line.
126 Input = Input.drop_front(N: Input.find_first_of(Chars: "\n\r")).ltrim();
127 }
128 return CudaVersion::UNKNOWN;
129}
130} // namespace
131
132void CudaInstallationDetector::WarnIfUnsupportedVersion() const {
133 if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
134 std::string VersionString = CudaVersionToString(V: Version);
135 if (!VersionString.empty())
136 VersionString.insert(pos: 0, s: " ");
137 D.Diag(DiagID: diag::warn_drv_new_cuda_version)
138 << VersionString
139 << (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
140 << CudaVersionToString(V: CudaVersion::PARTIALLY_SUPPORTED);
141 } else if (Version > CudaVersion::FULLY_SUPPORTED)
142 D.Diag(DiagID: diag::warn_drv_partially_supported_cuda_version)
143 << CudaVersionToString(V: Version);
144}
145
146CudaInstallationDetector::CudaInstallationDetector(
147 const Driver &D, const llvm::Triple &HostTriple,
148 const llvm::opt::ArgList &Args)
149 : D(D) {
150 struct Candidate {
151 std::string Path;
152 bool StrictChecking;
153
154 Candidate(std::string Path, bool StrictChecking = false)
155 : Path(Path), StrictChecking(StrictChecking) {}
156 };
157 SmallVector<Candidate, 4> Candidates;
158
159 // In decreasing order so we prefer newer versions to older versions.
160 std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
161 auto &FS = D.getVFS();
162
163 if (Args.hasArg(Ids: options::OPT_cuda_path_EQ)) {
164 Candidates.emplace_back(
165 Args: Args.getLastArgValue(Id: options::OPT_cuda_path_EQ).str());
166 } else if (HostTriple.isOSWindows()) {
167 for (const char *Ver : Versions)
168 Candidates.emplace_back(
169 Args: D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
170 Ver);
171 } else {
172 if (!Args.hasArg(Ids: options::OPT_cuda_path_ignore_env)) {
173 // Try to find ptxas binary. If the executable is located in a directory
174 // called 'bin/', its parent directory might be a good guess for a valid
175 // CUDA installation.
176 // However, some distributions might installs 'ptxas' to /usr/bin. In that
177 // case the candidate would be '/usr' which passes the following checks
178 // because '/usr/include' exists as well. To avoid this case, we always
179 // check for the directory potentially containing files for libdevice,
180 // even if the user passes -nocudalib.
181 if (llvm::ErrorOr<std::string> ptxas =
182 llvm::sys::findProgramByName(Name: "ptxas")) {
183 SmallString<256> ptxasAbsolutePath;
184 llvm::sys::fs::real_path(path: *ptxas, output&: ptxasAbsolutePath);
185
186 StringRef ptxasDir = llvm::sys::path::parent_path(path: ptxasAbsolutePath);
187 if (llvm::sys::path::filename(path: ptxasDir) == "bin")
188 Candidates.emplace_back(
189 Args: std::string(llvm::sys::path::parent_path(path: ptxasDir)),
190 /*StrictChecking=*/Args: true);
191 }
192 }
193
194 Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda");
195 for (const char *Ver : Versions)
196 Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda-" + Ver);
197
198 Distro Dist(FS, llvm::Triple(llvm::sys::getProcessTriple()));
199 if (Dist.IsDebian() || Dist.IsUbuntu())
200 // Special case for Debian to have nvidia-cuda-toolkit work
201 // out of the box. More info on http://bugs.debian.org/882505
202 Candidates.emplace_back(Args: D.SysRoot + "/usr/lib/cuda");
203 }
204
205 bool NoCudaLib =
206 !Args.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, Default: true);
207
208 for (const auto &Candidate : Candidates) {
209 InstallPath = Candidate.Path;
210 if (InstallPath.empty() || !FS.exists(Path: InstallPath))
211 continue;
212
213 BinPath = InstallPath + "/bin";
214 IncludePath = InstallPath + "/include";
215 LibDevicePath = InstallPath + "/nvvm/libdevice";
216
217 if (!(FS.exists(Path: IncludePath) && FS.exists(Path: BinPath)))
218 continue;
219 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
220 if (CheckLibDevice && !FS.exists(Path: LibDevicePath))
221 continue;
222
223 Version = CudaVersion::UNKNOWN;
224 if (auto CudaHFile = FS.getBufferForFile(Name: InstallPath + "/include/cuda.h"))
225 Version = parseCudaHFile(Input: (*CudaHFile)->getBuffer());
226 // As the last resort, make an educated guess between CUDA-7.0, which had
227 // old-style libdevice bitcode, and an unknown recent CUDA version.
228 if (Version == CudaVersion::UNKNOWN) {
229 Version = FS.exists(Path: LibDevicePath + "/libdevice.10.bc")
230 ? CudaVersion::NEW
231 : CudaVersion::CUDA_70;
232 }
233
234 if (Version >= CudaVersion::CUDA_90) {
235 // CUDA-9+ uses single libdevice file for all GPU variants.
236 std::string FilePath = LibDevicePath + "/libdevice.10.bc";
237 if (FS.exists(Path: FilePath)) {
238 for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
239 Arch < E; ++Arch) {
240 OffloadArch OA = static_cast<OffloadArch>(Arch);
241 if (!IsNVIDIAOffloadArch(A: OA))
242 continue;
243 std::string OffloadArchName(OffloadArchToString(A: OA));
244 LibDeviceMap[OffloadArchName] = FilePath;
245 }
246 }
247 } else {
248 std::error_code EC;
249 for (llvm::vfs::directory_iterator LI = FS.dir_begin(Dir: LibDevicePath, EC),
250 LE;
251 !EC && LI != LE; LI = LI.increment(EC)) {
252 StringRef FilePath = LI->path();
253 StringRef FileName = llvm::sys::path::filename(path: FilePath);
254 // Process all bitcode filenames that look like
255 // libdevice.compute_XX.YY.bc
256 const StringRef LibDeviceName = "libdevice.";
257 if (!(FileName.starts_with(Prefix: LibDeviceName) && FileName.ends_with(Suffix: ".bc")))
258 continue;
259 StringRef GpuArch = FileName.slice(
260 Start: LibDeviceName.size(), End: FileName.find(C: '.', From: LibDeviceName.size()));
261 LibDeviceMap[GpuArch] = FilePath.str();
262 // Insert map entries for specific devices with this compute
263 // capability. NVCC's choice of the libdevice library version is
264 // rather peculiar and depends on the CUDA version.
265 if (GpuArch == "compute_20") {
266 LibDeviceMap["sm_20"] = std::string(FilePath);
267 LibDeviceMap["sm_21"] = std::string(FilePath);
268 LibDeviceMap["sm_32"] = std::string(FilePath);
269 } else if (GpuArch == "compute_30") {
270 LibDeviceMap["sm_30"] = std::string(FilePath);
271 if (Version < CudaVersion::CUDA_80) {
272 LibDeviceMap["sm_50"] = std::string(FilePath);
273 LibDeviceMap["sm_52"] = std::string(FilePath);
274 LibDeviceMap["sm_53"] = std::string(FilePath);
275 }
276 LibDeviceMap["sm_60"] = std::string(FilePath);
277 LibDeviceMap["sm_61"] = std::string(FilePath);
278 LibDeviceMap["sm_62"] = std::string(FilePath);
279 } else if (GpuArch == "compute_35") {
280 LibDeviceMap["sm_35"] = std::string(FilePath);
281 LibDeviceMap["sm_37"] = std::string(FilePath);
282 } else if (GpuArch == "compute_50") {
283 if (Version >= CudaVersion::CUDA_80) {
284 LibDeviceMap["sm_50"] = std::string(FilePath);
285 LibDeviceMap["sm_52"] = std::string(FilePath);
286 LibDeviceMap["sm_53"] = std::string(FilePath);
287 }
288 }
289 }
290 }
291
292 // Check that we have found at least one libdevice that we can link in if
293 // -nocudalib hasn't been specified.
294 if (LibDeviceMap.empty() && !NoCudaLib)
295 continue;
296
297 IsValid = true;
298 break;
299 }
300}
301
302void CudaInstallationDetector::AddCudaIncludeArgs(
303 const ArgList &DriverArgs, ArgStringList &CC1Args) const {
304 if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) {
305 // Add cuda_wrappers/* to our system include path. This lets us wrap
306 // standard library headers.
307 SmallString<128> P(D.ResourceDir);
308 llvm::sys::path::append(path&: P, a: "include");
309 llvm::sys::path::append(path&: P, a: "cuda_wrappers");
310 CC1Args.push_back(Elt: "-internal-isystem");
311 CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P));
312 }
313
314 if (!DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
315 Default: true))
316 return;
317
318 if (!isValid()) {
319 D.Diag(DiagID: diag::err_drv_no_cuda_installation);
320 return;
321 }
322
323 CC1Args.push_back(Elt: "-include");
324 CC1Args.push_back(Elt: "__clang_cuda_runtime_wrapper.h");
325}
326
327void CudaInstallationDetector::CheckCudaVersionSupportsArch(
328 OffloadArch Arch) const {
329 if (Arch == OffloadArch::Unknown || Version == CudaVersion::UNKNOWN ||
330 ArchsWithBadVersion[(int)Arch])
331 return;
332
333 auto MinVersion = MinVersionForOffloadArch(A: Arch);
334 auto MaxVersion = MaxVersionForOffloadArch(A: Arch);
335 if (Version < MinVersion || Version > MaxVersion) {
336 ArchsWithBadVersion[(int)Arch] = true;
337 D.Diag(DiagID: diag::err_drv_cuda_version_unsupported)
338 << OffloadArchToString(A: Arch) << CudaVersionToString(V: MinVersion)
339 << CudaVersionToString(V: MaxVersion) << InstallPath
340 << CudaVersionToString(V: Version);
341 }
342}
343
344void CudaInstallationDetector::print(raw_ostream &OS) const {
345 if (isValid())
346 OS << "Found CUDA installation: " << InstallPath << ", version "
347 << CudaVersionToString(V: Version) << "\n";
348}
349
350namespace {
351/// Debug info level for the NVPTX devices. We may need to emit different debug
352/// info level for the host and for the device itselfi. This type controls
353/// emission of the debug info for the devices. It either prohibits disable info
354/// emission completely, or emits debug directives only, or emits same debug
355/// info as for the host.
356enum DeviceDebugInfoLevel {
357 DisableDebugInfo, /// Do not emit debug info for the devices.
358 DebugDirectivesOnly, /// Emit only debug directives.
359 EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
360 /// host.
361};
362} // anonymous namespace
363
364/// Define debug info level for the NVPTX devices. If the debug info for both
365/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
366/// only debug directives are requested for the both host and device
367/// (-gline-directvies-only), or the debug info only for the device is disabled
368/// (optimization is on and --cuda-noopt-device-debug was not specified), the
369/// debug directves only must be emitted for the device. Otherwise, use the same
370/// debug info level just like for the host (with the limitations of only
371/// supported DWARF2 standard).
372static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
373 const Arg *A = Args.getLastArg(Ids: options::OPT_O_Group);
374 bool IsDebugEnabled = !A || A->getOption().matches(ID: options::OPT_O0) ||
375 Args.hasFlag(Pos: options::OPT_cuda_noopt_device_debug,
376 Neg: options::OPT_no_cuda_noopt_device_debug,
377 /*Default=*/false);
378 if (const Arg *A = Args.getLastArg(Ids: options::OPT_g_Group)) {
379 const Option &Opt = A->getOption();
380 if (Opt.matches(ID: options::OPT_gN_Group)) {
381 if (Opt.matches(ID: options::OPT_g0) || Opt.matches(ID: options::OPT_ggdb0))
382 return DisableDebugInfo;
383 if (Opt.matches(ID: options::OPT_gline_directives_only))
384 return DebugDirectivesOnly;
385 }
386 return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
387 }
388 return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
389}
390
391void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
392 const InputInfo &Output,
393 const InputInfoList &Inputs,
394 const ArgList &Args,
395 const char *LinkingOutput) const {
396 const auto &TC =
397 static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
398 assert(TC.getTriple().isNVPTX() && "Wrong platform");
399
400 BoundArch GPUArch;
401 // If this is a CUDA action we need to extract the device architecture
402 // from the Job's associated architecture, otherwise use the -march=arch
403 // option. This option may come from -Xopenmp-target flag or the default
404 // value.
405 if (JA.isDeviceOffloading(OKind: Action::OFK_Cuda)) {
406 GPUArch = JA.getOffloadingArch();
407 } else {
408 GPUArch = BoundArch(Args.getLastArgValue(Id: options::OPT_march_EQ));
409 if (GPUArch.empty()) {
410 C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
411 << getToolChain().getArchName() << getShortName();
412 return;
413 }
414 }
415
416 // Obtain architecture from the action.
417 assert(GPUArch.Arch != OffloadArch::Unknown &&
418 "Device action expected to have an architecture.");
419
420 // Check that our installation's ptxas supports gpu_arch.
421 if (!Args.hasArg(Ids: options::OPT_no_cuda_version_check)) {
422 TC.CudaInstallation.CheckCudaVersionSupportsArch(Arch: GPUArch.Arch);
423 }
424
425 ArgStringList CmdArgs;
426 CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
427 DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
428 if (DIKind == EmitSameDebugInfoAsHost) {
429 // ptxas does not accept -g option if optimization is enabled, so
430 // we ignore the compiler's -O* options if we want debug info.
431 CmdArgs.push_back(Elt: "-g");
432 CmdArgs.push_back(Elt: "--dont-merge-basicblocks");
433 CmdArgs.push_back(Elt: "--return-at-end");
434 } else if (Arg *A = Args.getLastArg(Ids: options::OPT_O_Group)) {
435 // Map the -O we received to -O{0,1,2,3}.
436 //
437 // TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
438 // default, so it may correspond more closely to the spirit of clang -O2.
439
440 // -O3 seems like the least-bad option when -Osomething is specified to
441 // clang but it isn't handled below.
442 StringRef OOpt = "3";
443 if (A->getOption().matches(ID: options::OPT_O4) ||
444 A->getOption().matches(ID: options::OPT_Ofast))
445 OOpt = "3";
446 else if (A->getOption().matches(ID: options::OPT_O0))
447 OOpt = "0";
448 else if (A->getOption().matches(ID: options::OPT_O)) {
449 // -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
450 OOpt = llvm::StringSwitch<const char *>(A->getValue())
451 .Case(S: "1", Value: "1")
452 .Case(S: "2", Value: "2")
453 .Case(S: "3", Value: "3")
454 .Case(S: "s", Value: "2")
455 .Case(S: "z", Value: "2")
456 .Default(Value: "2");
457 }
458 CmdArgs.push_back(Elt: Args.MakeArgString(Str: llvm::Twine("-O") + OOpt));
459 } else {
460 // If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
461 // to no optimizations, but ptxas's default is -O3.
462 CmdArgs.push_back(Elt: "-O0");
463 }
464 if (DIKind == DebugDirectivesOnly)
465 CmdArgs.push_back(Elt: "-lineinfo");
466
467 // Pass -v to ptxas if it was passed to the driver.
468 if (Args.hasArg(Ids: options::OPT_v))
469 CmdArgs.push_back(Elt: "-v");
470
471 CmdArgs.push_back(Elt: "--gpu-name");
472 CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch.ArchName));
473 CmdArgs.push_back(Elt: "--output-file");
474 std::string OutputFileName = TC.getInputFilename(Input: Output);
475
476 if (Output.isFilename() && OutputFileName != Output.getFilename())
477 C.addTempFile(Name: Args.MakeArgString(Str: OutputFileName));
478
479 CmdArgs.push_back(Elt: Args.MakeArgString(Str: OutputFileName));
480 for (const auto &II : Inputs)
481 CmdArgs.push_back(Elt: Args.MakeArgString(Str: II.getFilename()));
482
483 for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_ptxas))
484 CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
485
486 bool Relocatable;
487 if (JA.isOffloading(OKind: Action::OFK_OpenMP))
488 // In OpenMP we need to generate relocatable code.
489 Relocatable = Args.hasFlag(Pos: options::OPT_fopenmp_relocatable_target,
490 Neg: options::OPT_fnoopenmp_relocatable_target,
491 /*Default=*/true);
492 else if (JA.isOffloading(OKind: Action::OFK_Cuda))
493 // In CUDA we generate relocatable code by default.
494 Relocatable = Args.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc,
495 /*Default=*/false);
496 else
497 // Otherwise, we are compiling directly and should create linkable output.
498 Relocatable = true;
499
500 if (Relocatable)
501 CmdArgs.push_back(Elt: "-c");
502
503 const char *Exec;
504 if (Arg *A = Args.getLastArg(Ids: options::OPT_ptxas_path_EQ))
505 Exec = A->getValue();
506 else
507 Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "ptxas"));
508 C.addCommand(Cmd: std::make_unique<Command>(
509 args: JA, args: *this,
510 args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
511 .ResponseFlag: "--options-file"},
512 args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
513}
514
515static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
516 // The new driver does not include PTX by default to avoid overhead.
517 bool includePTX = !Args.hasFlag(Pos: options::OPT_offload_new_driver,
518 Neg: options::OPT_no_offload_new_driver, Default: true);
519 for (Arg *A : Args.filtered(Ids: options::OPT_cuda_include_ptx_EQ,
520 Ids: options::OPT_no_cuda_include_ptx_EQ)) {
521 A->claim();
522 const StringRef ArchStr = A->getValue();
523 if (A->getOption().matches(ID: options::OPT_cuda_include_ptx_EQ) &&
524 (ArchStr == "all" || ArchStr == InputArch))
525 includePTX = true;
526 else if (A->getOption().matches(ID: options::OPT_no_cuda_include_ptx_EQ) &&
527 (ArchStr == "all" || ArchStr == InputArch))
528 includePTX = false;
529 }
530 return includePTX;
531}
532
533// All inputs to this linker must be from CudaDeviceActions, as we need to look
534// at the Inputs' Actions in order to figure out which GPU architecture they
535// correspond to.
536void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
537 const InputInfo &Output,
538 const InputInfoList &Inputs,
539 const ArgList &Args,
540 const char *LinkingOutput) const {
541 const auto &TC =
542 static_cast<const toolchains::CudaToolChain &>(getToolChain());
543 assert(TC.getTriple().isNVPTX() && "Wrong platform");
544
545 ArgStringList CmdArgs;
546 if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
547 CmdArgs.push_back(Elt: "--cuda");
548 CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-64" : "-32");
549 CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--create"));
550 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Output.getFilename()));
551 if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
552 CmdArgs.push_back(Elt: "-g");
553
554 for (const auto &II : Inputs) {
555 auto *A = II.getAction();
556 assert(A->getInputs().size() == 1 &&
557 "Device offload action is expected to have a single input");
558 BoundArch GpuArch = A->getOffloadingArch();
559 assert(!GpuArch.empty() &&
560 "Device action expected to have associated a GPU architecture!");
561
562 if (II.getType() == types::TY_PP_Asm &&
563 !shouldIncludePTX(Args, InputArch: GpuArch.ArchName))
564 continue;
565 StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
566 CmdArgs.push_back(Elt: Args.MakeArgString(
567 Str: "--image3=kind=" + Kind + ",sm=" + GpuArch.ArchName.drop_front(N: 3) +
568 ",file=" + getToolChain().getInputFilename(Input: II)));
569 }
570
571 for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_fatbinary))
572 CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
573
574 const char *Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "fatbinary"));
575 C.addCommand(Cmd: std::make_unique<Command>(
576 args: JA, args: *this,
577 args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
578 .ResponseFlag: "--options-file"},
579 args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
580}
581
582void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
583 const InputInfo &Output,
584 const InputInfoList &Inputs,
585 const ArgList &Args,
586 const char *LinkingOutput) const {
587 const auto &TC =
588 static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
589 ArgStringList CmdArgs;
590
591 assert(TC.getTriple().isNVPTX() && "Wrong platform");
592
593 assert((Output.isFilename() || Output.isNothing()) && "Invalid output.");
594 if (Output.isFilename()) {
595 CmdArgs.push_back(Elt: "-o");
596 CmdArgs.push_back(Elt: Output.getFilename());
597 }
598
599 if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
600 CmdArgs.push_back(Elt: "-g");
601
602 if (Args.hasArg(Ids: options::OPT_v))
603 CmdArgs.push_back(Elt: "-v");
604
605 StringRef GPUArch = Args.getLastArgValue(Id: options::OPT_march_EQ);
606 if (GPUArch.empty() && !getToolChain().isUsingLTO(Args)) {
607 C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
608 << getToolChain().getArchName() << getShortName();
609 return;
610 }
611
612 if (!GPUArch.empty()) {
613 CmdArgs.push_back(Elt: "-arch");
614 CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch));
615 }
616
617 if (Args.hasArg(Ids: options::OPT_ptxas_path_EQ))
618 CmdArgs.push_back(Elt: Args.MakeArgString(
619 Str: "--pxtas-path=" + Args.getLastArgValue(Id: options::OPT_ptxas_path_EQ)));
620
621 if (Args.hasArg(Ids: options::OPT_cuda_path_EQ) || TC.CudaInstallation.isValid()) {
622 StringRef CudaPath = Args.getLastArgValue(
623 Id: options::OPT_cuda_path_EQ,
624 Default: llvm::sys::path::parent_path(path: TC.CudaInstallation.getBinPath()));
625 CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--cuda-path=" + CudaPath));
626 }
627
628 // Add paths specified in LIBRARY_PATH environment variable as -L options.
629 addDirectoryList(Args, CmdArgs, ArgName: "-L", EnvVar: "LIBRARY_PATH");
630
631 // Add standard library search paths passed on the command line.
632 Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L);
633 getToolChain().AddFilePathLibArgs(Args, CmdArgs);
634 AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA);
635
636 if (auto LTO = getToolChain().getLTOMode(Args); LTO != LTOK_None)
637 addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs,
638 IsThinLTO: LTO == LTOK_Thin);
639
640 // Forward the PTX features if the nvlink-wrapper needs it.
641 std::vector<StringRef> Features;
642 getNVPTXTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args,
643 Features);
644 CmdArgs.push_back(
645 Elt: Args.MakeArgString(Str: "--plugin-opt=-mattr=" + llvm::join(R&: Features, Separator: ",")));
646
647 // Add paths for the default clang library path.
648 SmallString<256> DefaultLibPath =
649 llvm::sys::path::parent_path(path: TC.getDriver().Dir);
650 llvm::sys::path::append(path&: DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
651 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Twine("-L") + DefaultLibPath));
652
653 getToolChain().addProfileRTLibs(Args, CmdArgs);
654 addSanitizerRuntimes(TC: getToolChain(), Args, CmdArgs);
655
656 if (Args.hasArg(Ids: options::OPT_stdlib))
657 CmdArgs.append(IL: {"-lc", "-lm"});
658 if (Args.hasArg(Ids: options::OPT_startfiles)) {
659 std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
660 if (!IncludePath)
661 IncludePath = "/lib";
662 SmallString<128> P(*IncludePath);
663 llvm::sys::path::append(path&: P, a: "crt1.o");
664 CmdArgs.push_back(Elt: Args.MakeArgString(Str: P));
665 }
666
667 C.addCommand(Cmd: std::make_unique<Command>(
668 args: JA, args: *this,
669 args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
670 .ResponseFlag: "--options-file"},
671 args: Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "clang-nvlink-wrapper")),
672 args&: CmdArgs, args: Inputs, args: Output));
673}
674
675void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
676 const llvm::opt::ArgList &Args,
677 std::vector<StringRef> &Features) {
678 if (Args.hasArg(Ids: options::OPT_cuda_feature_EQ)) {
679 StringRef PtxFeature = Args.getLastArgValue(Id: options::OPT_cuda_feature_EQ);
680 Features.push_back(x: Args.MakeArgString(Str: PtxFeature));
681 return;
682 }
683 CudaInstallationDetector CudaInstallation(D, Triple, Args);
684
685 // New CUDA versions often introduce new instructions that are only supported
686 // by new PTX version, so we need to raise PTX level to enable them in NVPTX
687 // back-end.
688 const char *PtxFeature = nullptr;
689 switch (CudaInstallation.version()) {
690#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
691 case CudaVersion::CUDA_##CUDA_VER: \
692 PtxFeature = "+ptx" #PTX_VER; \
693 break;
694 CASE_CUDA_VERSION(132, 92);
695 CASE_CUDA_VERSION(131, 91);
696 CASE_CUDA_VERSION(130, 90);
697 CASE_CUDA_VERSION(129, 88);
698 CASE_CUDA_VERSION(128, 87);
699 CASE_CUDA_VERSION(126, 85);
700 CASE_CUDA_VERSION(125, 85);
701 CASE_CUDA_VERSION(124, 84);
702 CASE_CUDA_VERSION(123, 83);
703 CASE_CUDA_VERSION(122, 82);
704 CASE_CUDA_VERSION(121, 81);
705 CASE_CUDA_VERSION(120, 80);
706 CASE_CUDA_VERSION(118, 78);
707 CASE_CUDA_VERSION(117, 77);
708 CASE_CUDA_VERSION(116, 76);
709 CASE_CUDA_VERSION(115, 75);
710 CASE_CUDA_VERSION(114, 74);
711 CASE_CUDA_VERSION(113, 73);
712 CASE_CUDA_VERSION(112, 72);
713 CASE_CUDA_VERSION(111, 71);
714 CASE_CUDA_VERSION(110, 70);
715 CASE_CUDA_VERSION(102, 65);
716 CASE_CUDA_VERSION(101, 64);
717 CASE_CUDA_VERSION(100, 63);
718 CASE_CUDA_VERSION(92, 61);
719 CASE_CUDA_VERSION(91, 61);
720 CASE_CUDA_VERSION(90, 60);
721 CASE_CUDA_VERSION(80, 50);
722 CASE_CUDA_VERSION(75, 43);
723 CASE_CUDA_VERSION(70, 42);
724#undef CASE_CUDA_VERSION
725 // TODO: Use specific CUDA version once it's public.
726 case clang::CudaVersion::NEW:
727 PtxFeature = "+ptx86";
728 break;
729 default:
730 // No PTX feature specified; let the backend choose based on the target SM.
731 break;
732 }
733 if (PtxFeature)
734 Features.push_back(x: PtxFeature);
735}
736
737/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
738/// operates as a stand-alone version of the NVPTX tools without the host
739/// toolchain.
740NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
741 const llvm::Triple &HostTriple,
742 const ArgList &Args)
743 : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
744 if (CudaInstallation.isValid())
745 getProgramPaths().push_back(Elt: std::string(CudaInstallation.getBinPath()));
746 // Lookup binaries into the driver directory, this is used to
747 // discover the 'nvptx-arch' executable.
748 getProgramPaths().push_back(Elt: getDriver().Dir);
749}
750
751/// We only need the host triple to locate the CUDA binary utilities, use the
752/// system's default triple if not provided.
753NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
754 const ArgList &Args)
755 : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {
756 loadMultilibsFromYAML(Args, D);
757}
758
759llvm::opt::DerivedArgList *
760NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
761 BoundArch BA,
762 Action::OffloadKind OffloadKind) const {
763 DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BA, DeviceOffloadKind: OffloadKind);
764 if (!DAL)
765 DAL = new DerivedArgList(Args.getBaseArgs());
766
767 const OptTable &Opts = getDriver().getOpts();
768
769 for (Arg *A : Args)
770 if (!llvm::is_contained(Range&: *DAL, Element: A))
771 DAL->append(A);
772
773 if (!DAL->hasArg(Ids: options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
774 DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
775 Value: OffloadArchToString(A: OffloadArch::CudaDefault));
776 } else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "generic" &&
777 OffloadKind == Action::OFK_None) {
778 DAL->eraseArg(Id: options::OPT_march_EQ);
779 } else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "native") {
780 auto GPUsOrErr = getSystemGPUArchs(Args);
781 if (!GPUsOrErr) {
782 getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch)
783 << getArchName() << llvm::toString(E: GPUsOrErr.takeError()) << "-march";
784 } else {
785 auto &GPUs = *GPUsOrErr;
786 if (llvm::SmallSet<std::string, 1>(GPUs.begin(), GPUs.end()).size() > 1)
787 getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch)
788 << getArchName() << llvm::join(R&: GPUs, Separator: ", ") << "-march";
789 DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
790 Value: Args.MakeArgString(Str: GPUs.front()));
791 }
792 }
793
794 return DAL;
795}
796
797void NVPTXToolChain::addClangTargetOptions(
798 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
799 BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const {}
800
801void NVPTXToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
802 ArgStringList &CC1Args) const {
803 if (DriverArgs.hasArg(Ids: options::OPT_nostdinc) ||
804 DriverArgs.hasArg(Ids: options::OPT_nostdlibinc))
805 return;
806
807 // Add multilib variant include paths in priority order.
808 for (const Multilib &M : getOrderedMultilibs()) {
809 if (M.isDefault())
810 continue;
811 if (std::optional<std::string> StdlibIncDir = getStdlibIncludePath()) {
812 SmallString<128> Dir(*StdlibIncDir);
813 llvm::sys::path::append(path&: Dir, a: M.includeSuffix());
814 if (getDriver().getVFS().exists(Path: Dir))
815 addSystemInclude(DriverArgs, CC1Args, Path: Dir);
816 }
817 }
818
819 if (std::optional<std::string> Path = getStdlibIncludePath())
820 addSystemInclude(DriverArgs, CC1Args, Path: *Path);
821}
822
823bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
824 const Option &O = A->getOption();
825 return (O.matches(ID: options::OPT_gN_Group) &&
826 !O.matches(ID: options::OPT_gmodules)) ||
827 O.matches(ID: options::OPT_g_Flag) ||
828 O.matches(ID: options::OPT_ggdbN_Group) || O.matches(ID: options::OPT_ggdb) ||
829 O.matches(ID: options::OPT_gdwarf) || O.matches(ID: options::OPT_gdwarf_2) ||
830 O.matches(ID: options::OPT_gdwarf_3) || O.matches(ID: options::OPT_gdwarf_4) ||
831 O.matches(ID: options::OPT_gdwarf_5) ||
832 O.matches(ID: options::OPT_gcolumn_info);
833}
834
835void NVPTXToolChain::adjustDebugInfoKind(
836 llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
837 const ArgList &Args) const {
838 switch (mustEmitDebugInfo(Args)) {
839 case DisableDebugInfo:
840 DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
841 break;
842 case DebugDirectivesOnly:
843 DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
844 break;
845 case EmitSameDebugInfoAsHost:
846 // Use same debug info level as the host.
847 break;
848 }
849}
850
851Expected<SmallVector<std::string>>
852NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
853 // Detect NVIDIA GPUs availible on the system.
854 std::string Program;
855 if (Arg *A = Args.getLastArg(Ids: options::OPT_offload_arch_tool_EQ))
856 Program = A->getValue();
857 else
858 Program = GetProgramPath(Name: "nvptx-arch");
859
860 auto StdoutOrErr = getDriver().executeProgram(Args: {Program});
861 if (!StdoutOrErr)
862 return StdoutOrErr.takeError();
863
864 SmallVector<std::string, 1> GPUArchs;
865 for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n"))
866 if (!Arch.empty())
867 GPUArchs.push_back(Elt: Arch.str());
868
869 if (GPUArchs.empty())
870 return llvm::createStringError(EC: std::error_code(),
871 S: "No NVIDIA GPU detected in the system");
872
873 return std::move(GPUArchs);
874}
875
876/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
877/// which isn't properly a linker but nonetheless performs the step of stitching
878/// together object files from the assembler into a single blob.
879
880CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
881 const ToolChain &HostTC, const ArgList &Args)
882 : NVPTXToolChain(D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
883
884void CudaToolChain::addClangTargetOptions(
885 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
886 BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const {
887 HostTC.addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadKind: DeviceOffloadingKind);
888
889 StringRef GpuArch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
890 assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
891 DeviceOffloadingKind == Action::OFK_Cuda) &&
892 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
893
894 CC1Args.append(IL: {"-fcuda-is-device", "-mllvm",
895 "-enable-memcpyopt-without-libcalls",
896 "-fno-threadsafe-statics"});
897
898 if (DriverArgs.hasFlag(Pos: options::OPT_fcuda_short_ptr,
899 Neg: options::OPT_fno_cuda_short_ptr, Default: false))
900 CC1Args.append(IL: {"-mllvm", "--nvptx-short-ptr"});
901
902 if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib,
903 Default: true))
904 return;
905
906 if (DeviceOffloadingKind == Action::OFK_OpenMP &&
907 DriverArgs.hasArg(Ids: options::OPT_S))
908 return;
909
910 std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Gpu: GpuArch);
911 if (LibDeviceFile.empty()) {
912 getDriver().Diag(DiagID: diag::err_drv_no_cuda_libdevice) << GpuArch;
913 return;
914 }
915
916 CC1Args.push_back(Elt: "-mlink-builtin-bitcode");
917 CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: LibDeviceFile));
918
919 // For now, we don't use any Offload/OpenMP device runtime when we offload
920 // CUDA via LLVM/Offload. We should split the Offload/OpenMP device runtime
921 // and include the "generic" (or CUDA-specific) parts.
922 if (DriverArgs.hasFlag(Pos: options::OPT_foffload_via_llvm,
923 Neg: options::OPT_fno_offload_via_llvm, Default: false))
924 return;
925
926 clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
927
928 if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
929 CC1Args.push_back(
930 Elt: DriverArgs.MakeArgString(Str: Twine("-target-sdk-version=") +
931 CudaVersionToString(V: CudaInstallationVersion)));
932
933 if (DeviceOffloadingKind == Action::OFK_OpenMP) {
934 if (CudaInstallationVersion < CudaVersion::CUDA_92) {
935 getDriver().Diag(
936 DiagID: diag::err_drv_omp_offload_target_cuda_version_not_support)
937 << CudaVersionToString(V: CudaInstallationVersion);
938 return;
939 }
940
941 // Link the bitcode library late if we're using device LTO.
942 if (isUsingLTO(Args: DriverArgs, Kind: DeviceOffloadingKind))
943 return;
944
945 addOpenMPDeviceRTL(D: getDriver(), DriverArgs, CC1Args, BitcodeSuffix: GpuArch.str(),
946 Triple: getTriple(), HostTC);
947 }
948}
949
950llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
951 const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
952 const llvm::fltSemantics *FPType) const {
953 if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
954 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
955 DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero,
956 Neg: options::OPT_fno_gpu_flush_denormals_to_zero, Default: false))
957 return llvm::DenormalMode::getPreserveSign();
958 }
959
960 assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
961 return llvm::DenormalMode::getIEEE();
962}
963
964void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
965 ArgStringList &CC1Args) const {
966 // Check our CUDA version if we're going to include the CUDA headers.
967 if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
968 Default: true) &&
969 !DriverArgs.hasArg(Ids: options::OPT_no_cuda_version_check)) {
970 StringRef Arch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
971 assert(!Arch.empty() && "Must have an explicit GPU arch.");
972 CudaInstallation.CheckCudaVersionSupportsArch(Arch: StringToOffloadArch(S: Arch));
973 }
974 CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
975}
976
977std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
978 // Only object files are changed, for example assembly files keep their .s
979 // extensions. If the user requested device-only compilation don't change it.
980 if (Input.getType() != types::TY_Object || getDriver().offloadDeviceOnly())
981 return ToolChain::getInputFilename(Input);
982
983 return ToolChain::getInputFilename(Input);
984}
985
986llvm::opt::DerivedArgList *
987CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
988 BoundArch BA,
989 Action::OffloadKind DeviceOffloadKind) const {
990 DerivedArgList *DAL = HostTC.TranslateArgs(Args, BA, DeviceOffloadKind);
991 if (!DAL)
992 DAL = new DerivedArgList(Args.getBaseArgs());
993
994 const OptTable &Opts = getDriver().getOpts();
995
996 for (Arg *A : Args) {
997 // Make sure flags are not duplicated.
998 if (!llvm::is_contained(Range&: *DAL, Element: A)) {
999 DAL->append(A);
1000 }
1001 }
1002
1003 if (BA) {
1004 DAL->eraseArg(Id: options::OPT_march_EQ);
1005 DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
1006 Value: BA.ArchName);
1007 }
1008 return DAL;
1009}
1010
1011Tool *NVPTXToolChain::buildAssembler() const {
1012 return new tools::NVPTX::Assembler(*this);
1013}
1014
1015Tool *NVPTXToolChain::buildLinker() const {
1016 return new tools::NVPTX::Linker(*this);
1017}
1018
1019Tool *CudaToolChain::buildAssembler() const {
1020 return new tools::NVPTX::Assembler(*this);
1021}
1022
1023Tool *CudaToolChain::buildLinker() const {
1024 return new tools::NVPTX::FatBinary(*this);
1025}
1026
1027void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1028 HostTC.addClangWarningOptions(CC1Args);
1029}
1030
1031ToolChain::CXXStdlibType
1032CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
1033 return HostTC.GetCXXStdlibType(Args);
1034}
1035
1036void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1037 ArgStringList &CC1Args) const {
1038 HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1039
1040 if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
1041 Default: true) &&
1042 CudaInstallation.isValid())
1043 CC1Args.append(
1044 IL: {"-internal-isystem",
1045 DriverArgs.MakeArgString(Str: CudaInstallation.getIncludePath())});
1046}
1047
1048void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1049 ArgStringList &CC1Args) const {
1050 HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
1051}
1052
1053void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1054 ArgStringList &CC1Args) const {
1055 HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
1056}
1057
1058SanitizerMask CudaToolChain::getSupportedSanitizers(
1059 BoundArch BA, Action::OffloadKind DeviceOffloadKind) const {
1060 // The CudaToolChain only supports sanitizers in the sense that it allows
1061 // sanitizer arguments on the command line if they are supported by the host
1062 // toolchain. The CudaToolChain will actually ignore any command line
1063 // arguments for any of these "supported" sanitizers. That means that no
1064 // sanitization of device code is actually supported at this time.
1065 //
1066 // This behavior is necessary because the host and device toolchains
1067 // invocations often share the command line, so the device toolchain must
1068 // tolerate flags meant only for the host toolchain.
1069
1070 // FIXME: Be accurate and use DeviceOffloadKind.
1071 return HostTC.getSupportedSanitizers(BA, DeviceOffloadKind);
1072}
1073
1074VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1075 const ArgList &Args) const {
1076 return HostTC.computeMSVCVersion(D, Args);
1077}
1078