Cuda.cpp source code [llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp]

1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Cuda.h"
10	#include "clang/Basic/Cuda.h"
11	#include "clang/Config/config.h"
12	#include "clang/Driver/CommonArgs.h"
13	#include "clang/Driver/Compilation.h"
14	#include "clang/Driver/Distro.h"
15	#include "clang/Driver/Driver.h"
16	#include "clang/Driver/InputInfo.h"
17	#include "clang/Driver/Options.h"
18	#include "llvm/ADT/StringExtras.h"
19	#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
20	#include "llvm/Option/ArgList.h"
21	#include "llvm/Support/FileSystem.h"
22	#include "llvm/Support/Path.h"
23	#include "llvm/Support/Process.h"
24	#include "llvm/Support/Program.h"
25	#include "llvm/Support/VirtualFileSystem.h"
26	#include "llvm/TargetParser/Host.h"
27	#include "llvm/TargetParser/TargetParser.h"
28	#include <system_error>
29
30	using namespace clang::driver;
31	using namespace clang::driver::toolchains;
32	using namespace clang::driver::tools;
33	using namespace clang;
34	using namespace llvm::opt;
35
36	namespace {
37
38	CudaVersion getCudaVersion(uint32_t raw_version) {
39	if (raw_version < `7050`)
40	return CudaVersion::CUDA_70;
41	if (raw_version < `8000`)
42	return CudaVersion::CUDA_75;
43	if (raw_version < `9000`)
44	return CudaVersion::CUDA_80;
45	if (raw_version < `9010`)
46	return CudaVersion::CUDA_90;
47	if (raw_version < `9020`)
48	return CudaVersion::CUDA_91;
49	if (raw_version < `10000`)
50	return CudaVersion::CUDA_92;
51	if (raw_version < `10010`)
52	return CudaVersion::CUDA_100;
53	if (raw_version < `10020`)
54	return CudaVersion::CUDA_101;
55	if (raw_version < `11000`)
56	return CudaVersion::CUDA_102;
57	if (raw_version < `11010`)
58	return CudaVersion::CUDA_110;
59	if (raw_version < `11020`)
60	return CudaVersion::CUDA_111;
61	if (raw_version < `11030`)
62	return CudaVersion::CUDA_112;
63	if (raw_version < `11040`)
64	return CudaVersion::CUDA_113;
65	if (raw_version < `11050`)
66	return CudaVersion::CUDA_114;
67	if (raw_version < `11060`)
68	return CudaVersion::CUDA_115;
69	if (raw_version < `11070`)
70	return CudaVersion::CUDA_116;
71	if (raw_version < `11080`)
72	return CudaVersion::CUDA_117;
73	if (raw_version < `11090`)
74	return CudaVersion::CUDA_118;
75	if (raw_version < `12010`)
76	return CudaVersion::CUDA_120;
77	if (raw_version < `12020`)
78	return CudaVersion::CUDA_121;
79	if (raw_version < `12030`)
80	return CudaVersion::CUDA_122;
81	if (raw_version < `12040`)
82	return CudaVersion::CUDA_123;
83	if (raw_version < `12050`)
84	return CudaVersion::CUDA_124;
85	if (raw_version < `12060`)
86	return CudaVersion::CUDA_125;
87	if (raw_version < `12070`)
88	return CudaVersion::CUDA_126;
89	if (raw_version < `12090`)
90	return CudaVersion::CUDA_128;
91	return CudaVersion::NEW;
92	}
93
94	CudaVersion parseCudaHFile(llvm::StringRef Input) {
95	// Helper lambda which skips the words if the line starts with them or returns
96	// std::nullopt otherwise.
97	auto StartsWithWords =
98	[](llvm::StringRef Line,
99	const SmallVector<StringRef, `3`> words) -> std::optional<StringRef> {
100	for (StringRef word : words) {
101	if (!Line.consume_front(Prefix: word))
102	return {};
103	Line = Line.ltrim();
104	}
105	return Line;
106	};
107
108	Input = Input.ltrim();
109	while (!Input.empty()) {
110	if (auto Line =
111	StartsWithWords (Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
112	uint32_t RawVersion;
113	Line ->consumeInteger(Radix: `10`, Result&: RawVersion);
114	return getCudaVersion(raw_version: RawVersion);
115	}
116	// Find next non-empty line.
117	Input = Input.drop_front(N: Input.find_first_of(Chars: "\n\r")).ltrim();
118	}
119	return CudaVersion::UNKNOWN;
120	}
121	} // namespace
122
123	void CudaInstallationDetector::WarnIfUnsupportedVersion() const {
124	if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
125	std::string VersionString = CudaVersionToString(V: Version);
126	if (!VersionString.empty())
127	VersionString.insert(pos: `0`, s: " ");
128	D.Diag(DiagID: diag::warn_drv_new_cuda_version)
129	<< VersionString
130	<< (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
131	<< CudaVersionToString(V: CudaVersion::PARTIALLY_SUPPORTED);
132	} else if (Version > CudaVersion::FULLY_SUPPORTED)
133	D.Diag(DiagID: diag::warn_drv_partially_supported_cuda_version)
134	<< CudaVersionToString(V: Version);
135	}
136
137	CudaInstallationDetector::CudaInstallationDetector(
138	const Driver &D, const llvm::Triple &HostTriple,
139	const llvm::opt::ArgList &Args)
140	: D(D) {
141	struct Candidate {
142	std::string Path;
143	bool StrictChecking;
144
145	Candidate(std::string Path, bool StrictChecking = false)
146	: Path (Path), StrictChecking(StrictChecking) {}
147	};
148	SmallVector<Candidate, `4`> Candidates;
149
150	// In decreasing order so we prefer newer versions to older versions.
151	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
152	auto &FS = D.getVFS();
153
154	if (Args.hasArg(Ids: clang::driver::options::OPT_cuda_path_EQ)) {
155	Candidates.emplace_back(
156	Args: Args.getLastArgValue(Id: clang::driver::options::OPT_cuda_path_EQ).str());
157	} else if (HostTriple.isOSWindows()) {
158	for (const char *Ver : Versions)
159	Candidates.emplace_back(
160	Args: D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
161	Ver);
162	} else {
163	if (!Args.hasArg(Ids: clang::driver::options::OPT_cuda_path_ignore_env)) {
164	// Try to find ptxas binary. If the executable is located in a directory
165	// called 'bin/', its parent directory might be a good guess for a valid
166	// CUDA installation.
167	// However, some distributions might installs 'ptxas' to /usr/bin. In that
168	// case the candidate would be '/usr' which passes the following checks
169	// because '/usr/include' exists as well. To avoid this case, we always
170	// check for the directory potentially containing files for libdevice,
171	// even if the user passes -nocudalib.
172	if (llvm::ErrorOr<std::string> ptxas =
173	llvm::sys::findProgramByName(Name: "ptxas")) {
174	SmallString<`256`> ptxasAbsolutePath;
175	llvm::sys::fs::real_path(path: *ptxas, output&: ptxasAbsolutePath);
176
177	StringRef ptxasDir = llvm::sys::path::parent_path(path: ptxasAbsolutePath);
178	if (llvm::sys::path::filename(path: ptxasDir) == "bin")
179	Candidates.emplace_back(
180	Args: std::string (llvm::sys::path::parent_path(path: ptxasDir)),
181	/StrictChecking=/Args: true);
182	}
183	}
184
185	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda");
186	for (const char *Ver : Versions)
187	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda-" + Ver);
188
189	Distro Dist(FS, llvm::Triple (llvm::sys::getProcessTriple()));
190	if (Dist.IsDebian() \|\| Dist.IsUbuntu())
191	// Special case for Debian to have nvidia-cuda-toolkit work
192	// out of the box. More info on http://bugs.debian.org/882505
193	Candidates.emplace_back(Args: D.SysRoot + "/usr/lib/cuda");
194	}
195
196	bool NoCudaLib =
197	!Args.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, Default: true);
198
199	for (const auto &Candidate : Candidates) {
200	InstallPath = Candidate.Path;
201	if (InstallPath.empty() \|\| !FS.exists(Path: InstallPath))
202	continue;
203
204	BinPath = InstallPath + "/bin";
205	IncludePath = InstallPath + "/include";
206	LibDevicePath = InstallPath + "/nvvm/libdevice";
207
208	if (!(FS.exists(Path: IncludePath) && FS.exists(Path: BinPath)))
209	continue;
210	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
211	if (CheckLibDevice && !FS.exists(Path: LibDevicePath))
212	continue;
213
214	Version = CudaVersion::UNKNOWN;
215	if (auto CudaHFile = FS.getBufferForFile(Name: InstallPath + "/include/cuda.h"))
216	Version = parseCudaHFile(Input: (*CudaHFile)->getBuffer());
217	// As the last resort, make an educated guess between CUDA-7.0, which had
218	// old-style libdevice bitcode, and an unknown recent CUDA version.
219	if (Version == CudaVersion::UNKNOWN) {
220	Version = FS.exists(Path: LibDevicePath + "/libdevice.10.bc")
221	? CudaVersion::NEW
222	: CudaVersion::CUDA_70;
223	}
224
225	if (Version >= CudaVersion::CUDA_90) {
226	// CUDA-9+ uses single libdevice file for all GPU variants.
227	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
228	if (FS.exists(Path: FilePath)) {
229	for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
230	Arch < E; ++Arch) {
231	OffloadArch OA = static_cast<OffloadArch>(Arch);
232	if (!IsNVIDIAOffloadArch(A: OA))
233	continue;
234	std::string OffloadArchName(OffloadArchToString(A: OA));
235	LibDeviceMap [OffloadArchName] = FilePath;
236	}
237	}
238	} else {
239	std::error_code EC;
240	for (llvm::vfs::directory_iterator LI = FS.dir_begin(Dir: LibDevicePath, EC),
241	LE;
242	!EC && LI != LE; LI = LI.increment(EC)) {
243	StringRef FilePath = LI ->path();
244	StringRef FileName = llvm::sys::path::filename(path: FilePath);
245	// Process all bitcode filenames that look like
246	// libdevice.compute_XX.YY.bc
247	const StringRef LibDeviceName = "libdevice.";
248	if (!(FileName.starts_with(Prefix: LibDeviceName) && FileName.ends_with(Suffix: ".bc")))
249	continue;
250	StringRef GpuArch = FileName.slice(
251	Start: LibDeviceName.size(), End: FileName.find(C: `'.'`, From: LibDeviceName.size()));
252	LibDeviceMap [GpuArch] = FilePath.str();
253	// Insert map entries for specific devices with this compute
254	// capability. NVCC's choice of the libdevice library version is
255	// rather peculiar and depends on the CUDA version.
256	if (GpuArch == "compute_20") {
257	LibDeviceMap ["sm_20"] = std::string (FilePath);
258	LibDeviceMap ["sm_21"] = std::string (FilePath);
259	LibDeviceMap ["sm_32"] = std::string (FilePath);
260	} else if (GpuArch == "compute_30") {
261	LibDeviceMap ["sm_30"] = std::string (FilePath);
262	if (Version < CudaVersion::CUDA_80) {
263	LibDeviceMap ["sm_50"] = std::string (FilePath);
264	LibDeviceMap ["sm_52"] = std::string (FilePath);
265	LibDeviceMap ["sm_53"] = std::string (FilePath);
266	}
267	LibDeviceMap ["sm_60"] = std::string (FilePath);
268	LibDeviceMap ["sm_61"] = std::string (FilePath);
269	LibDeviceMap ["sm_62"] = std::string (FilePath);
270	} else if (GpuArch == "compute_35") {
271	LibDeviceMap ["sm_35"] = std::string (FilePath);
272	LibDeviceMap ["sm_37"] = std::string (FilePath);
273	} else if (GpuArch == "compute_50") {
274	if (Version >= CudaVersion::CUDA_80) {
275	LibDeviceMap ["sm_50"] = std::string (FilePath);
276	LibDeviceMap ["sm_52"] = std::string (FilePath);
277	LibDeviceMap ["sm_53"] = std::string (FilePath);
278	}
279	}
280	}
281	}
282
283	// Check that we have found at least one libdevice that we can link in if
284	// -nocudalib hasn't been specified.
285	if (LibDeviceMap.empty() && !NoCudaLib)
286	continue;
287
288	IsValid = true;
289	break;
290	}
291	}
292
293	void CudaInstallationDetector::AddCudaIncludeArgs(
294	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
295	if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) {
296	// Add cuda_wrappers/ to our system include path. This lets us wrap*
297	// standard library headers.
298	SmallString<`128`> P(D.ResourceDir);
299	llvm::sys::path::append(path&: P, a: "include");
300	llvm::sys::path::append(path&: P, a: "cuda_wrappers");
301	CC1Args.push_back(Elt: "-internal-isystem");
302	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P));
303	}
304
305	if (!DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
306	Default: true))
307	return;
308
309	if (!isValid()) {
310	D.Diag(DiagID: diag::err_drv_no_cuda_installation);
311	return;
312	}
313
314	CC1Args.push_back(Elt: "-include");
315	CC1Args.push_back(Elt: "__clang_cuda_runtime_wrapper.h");
316	}
317
318	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
319	OffloadArch Arch) const {
320	if (Arch == OffloadArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
321	ArchsWithBadVersion [(int)Arch])
322	return;
323
324	auto MinVersion = MinVersionForOffloadArch(A: Arch);
325	auto MaxVersion = MaxVersionForOffloadArch(A: Arch);
326	if (Version < MinVersion \|\| Version > MaxVersion) {
327	ArchsWithBadVersion [(int)Arch] = true;
328	D.Diag(DiagID: diag::err_drv_cuda_version_unsupported)
329	<< OffloadArchToString(A: Arch) << CudaVersionToString(V: MinVersion)
330	<< CudaVersionToString(V: MaxVersion) << InstallPath
331	<< CudaVersionToString(V: Version);
332	}
333	}
334
335	void CudaInstallationDetector::print(raw_ostream &OS) const {
336	if (isValid())
337	OS << "Found CUDA installation: " << InstallPath << ", version "
338	<< CudaVersionToString(V: Version) << "\n";
339	}
340
341	namespace {
342	/// Debug info level for the NVPTX devices. We may need to emit different debug
343	/// info level for the host and for the device itselfi. This type controls
344	/// emission of the debug info for the devices. It either prohibits disable info
345	/// emission completely, or emits debug directives only, or emits same debug
346	/// info as for the host.
347	enum DeviceDebugInfoLevel {
348	DisableDebugInfo, /// Do not emit debug info for the devices.
349	DebugDirectivesOnly, /// Emit only debug directives.
350	EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
351	/// host.
352	};
353	} // anonymous namespace
354
355	/// Define debug info level for the NVPTX devices. If the debug info for both
356	/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
357	/// only debug directives are requested for the both host and device
358	/// (-gline-directvies-only), or the debug info only for the device is disabled
359	/// (optimization is on and --cuda-noopt-device-debug was not specified), the
360	/// debug directves only must be emitted for the device. Otherwise, use the same
361	/// debug info level just like for the host (with the limitations of only
362	/// supported DWARF2 standard).
363	static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
364	const Arg *A = Args.getLastArg(Ids: options::OPT_O_Group);
365	bool IsDebugEnabled = !A \|\| A->getOption().matches(ID: options::OPT_O0) \|\|
366	Args.hasFlag(Pos: options::OPT_cuda_noopt_device_debug,
367	Neg: options::OPT_no_cuda_noopt_device_debug,
368	/Default=/false);
369	if (const Arg *A = Args.getLastArg(Ids: options::OPT_g_Group)) {
370	const Option &Opt = A->getOption();
371	if (Opt.matches(ID: options::OPT_gN_Group)) {
372	if (Opt.matches(ID: options::OPT_g0) \|\| Opt.matches(ID: options::OPT_ggdb0))
373	return DisableDebugInfo;
374	if (Opt.matches(ID: options::OPT_gline_directives_only))
375	return DebugDirectivesOnly;
376	}
377	return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
378	}
379	return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
380	}
381
382	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
383	const InputInfo &Output,
384	const InputInfoList &Inputs,
385	const ArgList &Args,
386	const char LinkingOutput) const* {
387	const auto &TC =
388	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
389	assert(TC.getTriple().isNVPTX() && "Wrong platform");
390
391	StringRef GPUArchName;
392	// If this is a CUDA action we need to extract the device architecture
393	// from the Job's associated architecture, otherwise use the -march=arch
394	// option. This option may come from -Xopenmp-target flag or the default
395	// value.
396	if (JA.isDeviceOffloading(OKind: Action::OFK_Cuda)) {
397	GPUArchName = JA.getOffloadingArch();
398	} else {
399	GPUArchName = Args.getLastArgValue(Id: options::OPT_march_EQ);
400	if (GPUArchName.empty()) {
401	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
402	<< getToolChain().getArchName() << getShortName();
403	return;
404	}
405	}
406
407	// Obtain architecture from the action.
408	OffloadArch gpu_arch = StringToOffloadArch(S: GPUArchName);
409	assert(gpu_arch != OffloadArch::UNKNOWN &&
410	"Device action expected to have an architecture.");
411
412	// Check that our installation's ptxas supports gpu_arch.
413	if (!Args.hasArg(Ids: options::OPT_no_cuda_version_check)) {
414	TC.CudaInstallation.CheckCudaVersionSupportsArch(Arch: gpu_arch);
415	}
416
417	ArgStringList CmdArgs;
418	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
419	DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
420	if (DIKind == EmitSameDebugInfoAsHost) {
421	// ptxas does not accept -g option if optimization is enabled, so
422	// we ignore the compiler's -O options if we want debug info.*
423	CmdArgs.push_back(Elt: "-g");
424	CmdArgs.push_back(Elt: "--dont-merge-basicblocks");
425	CmdArgs.push_back(Elt: "--return-at-end");
426	} else if (Arg *A = Args.getLastArg(Ids: options::OPT_O_Group)) {
427	// Map the -O we received to -O{0,1,2,3}.
428	//
429	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
430	// default, so it may correspond more closely to the spirit of clang -O2.
431
432	// -O3 seems like the least-bad option when -Osomething is specified to
433	// clang but it isn't handled below.
434	StringRef OOpt = "3";
435	if (A->getOption().matches(ID: options::OPT_O4) \|\|
436	A->getOption().matches(ID: options::OPT_Ofast))
437	OOpt = "3";
438	else if (A->getOption().matches(ID: options::OPT_O0))
439	OOpt = "0";
440	else if (A->getOption().matches(ID: options::OPT_O)) {
441	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
442	OOpt = llvm::StringSwitch<const char *>(A->getValue())
443	.Case(S: "1", Value: "1")
444	.Case(S: "2", Value: "2")
445	.Case(S: "3", Value: "3")
446	.Case(S: "s", Value: "2")
447	.Case(S: "z", Value: "2")
448	.Default(Value: "2");
449	}
450	CmdArgs.push_back(Elt: Args.MakeArgString(Str: llvm::Twine ("-O") + OOpt));
451	} else {
452	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
453	// to no optimizations, but ptxas's default is -O3.
454	CmdArgs.push_back(Elt: "-O0");
455	}
456	if (DIKind == DebugDirectivesOnly)
457	CmdArgs.push_back(Elt: "-lineinfo");
458
459	// Pass -v to ptxas if it was passed to the driver.
460	if (Args.hasArg(Ids: options::OPT_v))
461	CmdArgs.push_back(Elt: "-v");
462
463	CmdArgs.push_back(Elt: "--gpu-name");
464	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OffloadArchToString(A: gpu_arch)));
465	CmdArgs.push_back(Elt: "--output-file");
466	std::string OutputFileName = TC.getInputFilename(Input: Output);
467
468	if (Output.isFilename() && OutputFileName != Output.getFilename())
469	C.addTempFile(Name: Args.MakeArgString(Str: OutputFileName));
470
471	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OutputFileName));
472	for (const auto &II : Inputs)
473	CmdArgs.push_back(Elt: Args.MakeArgString(Str: II.getFilename()));
474
475	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_ptxas))
476	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
477
478	bool Relocatable;
479	if (JA.isOffloading(OKind: Action::OFK_OpenMP))
480	// In OpenMP we need to generate relocatable code.
481	Relocatable = Args.hasFlag(Pos: options::OPT_fopenmp_relocatable_target,
482	Neg: options::OPT_fnoopenmp_relocatable_target,
483	/Default=/true);
484	else if (JA.isOffloading(OKind: Action::OFK_Cuda))
485	// In CUDA we generate relocatable code by default.
486	Relocatable = Args.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc,
487	/Default=/false);
488	else
489	// Otherwise, we are compiling directly and should create linkable output.
490	Relocatable = true;
491
492	if (Relocatable)
493	CmdArgs.push_back(Elt: "-c");
494
495	const char *Exec;
496	if (Arg *A = Args.getLastArg(Ids: options::OPT_ptxas_path_EQ))
497	Exec = A->getValue();
498	else
499	Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "ptxas"));
500	C.addCommand(C: std::make_unique<Command>(
501	args: JA, args: *this,
502	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
503	.ResponseFlag: "--options-file"},
504	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
505	}
506
507	static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
508	// The new driver does not include PTX by default to avoid overhead.
509	bool includePTX = !Args.hasFlag(Pos: options::OPT_offload_new_driver,
510	Neg: options::OPT_no_offload_new_driver, Default: true);
511	for (Arg *A : Args.filtered(Ids: options::OPT_cuda_include_ptx_EQ,
512	Ids: options::OPT_no_cuda_include_ptx_EQ)) {
513	A->claim();
514	const StringRef ArchStr = A->getValue();
515	if (A->getOption().matches(ID: options::OPT_cuda_include_ptx_EQ) &&
516	(ArchStr == "all" \|\| ArchStr == InputArch))
517	includePTX = true;
518	else if (A->getOption().matches(ID: options::OPT_no_cuda_include_ptx_EQ) &&
519	(ArchStr == "all" \|\| ArchStr == InputArch))
520	includePTX = false;
521	}
522	return includePTX;
523	}
524
525	// All inputs to this linker must be from CudaDeviceActions, as we need to look
526	// at the Inputs' Actions in order to figure out which GPU architecture they
527	// correspond to.
528	void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
529	const InputInfo &Output,
530	const InputInfoList &Inputs,
531	const ArgList &Args,
532	const char LinkingOutput) const* {
533	const auto &TC =
534	static_cast<const toolchains::CudaToolChain &>(getToolChain());
535	assert(TC.getTriple().isNVPTX() && "Wrong platform");
536
537	ArgStringList CmdArgs;
538	if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
539	CmdArgs.push_back(Elt: "--cuda");
540	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-64" : "-32");
541	CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--create"));
542	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Output.getFilename()));
543	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
544	CmdArgs.push_back(Elt: "-g");
545
546	for (const auto &II : Inputs) {
547	auto *A = II.getAction();
548	assert(A->getInputs().size() == `1` &&
549	"Device offload action is expected to have a single input");
550	const char *gpu_arch_str = A->getOffloadingArch();
551	assert(gpu_arch_str &&
552	"Device action expected to have associated a GPU architecture!");
553	OffloadArch gpu_arch = StringToOffloadArch(S: gpu_arch_str);
554
555	if (II.getType() == types::TY_PP_Asm &&
556	!shouldIncludePTX(Args, InputArch: gpu_arch_str))
557	continue;
558	// We need to pass an Arch of the form "sm_XX" for cubin files and
559	// "compute_XX" for ptx.
560	const char *Arch = (II.getType() == types::TY_PP_Asm)
561	? OffloadArchToVirtualArchString(A: gpu_arch)
562	: gpu_arch_str;
563	CmdArgs.push_back(
564	Elt: Args.MakeArgString(Str: llvm::Twine ("--image=profile=") + Arch +
565	",file=" + getToolChain().getInputFilename(Input: II)));
566	}
567
568	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_fatbinary))
569	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
570
571	const char *Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "fatbinary"));
572	C.addCommand(C: std::make_unique<Command>(
573	args: JA, args: *this,
574	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
575	.ResponseFlag: "--options-file"},
576	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
577	}
578
579	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
580	const InputInfo &Output,
581	const InputInfoList &Inputs,
582	const ArgList &Args,
583	const char LinkingOutput) const* {
584	const auto &TC =
585	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
586	ArgStringList CmdArgs;
587
588	assert(TC.getTriple().isNVPTX() && "Wrong platform");
589
590	assert((Output.isFilename() \|\| Output.isNothing()) && "Invalid output.");
591	if (Output.isFilename()) {
592	CmdArgs.push_back(Elt: "-o");
593	CmdArgs.push_back(Elt: Output.getFilename());
594	}
595
596	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
597	CmdArgs.push_back(Elt: "-g");
598
599	if (Args.hasArg(Ids: options::OPT_v))
600	CmdArgs.push_back(Elt: "-v");
601
602	StringRef GPUArch = Args.getLastArgValue(Id: options::OPT_march_EQ);
603	if (GPUArch.empty() && !C.getDriver().isUsingLTO()) {
604	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
605	<< getToolChain().getArchName() << getShortName();
606	return;
607	}
608
609	if (!GPUArch.empty()) {
610	CmdArgs.push_back(Elt: "-arch");
611	CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch));
612	}
613
614	if (Args.hasArg(Ids: options::OPT_ptxas_path_EQ))
615	CmdArgs.push_back(Elt: Args.MakeArgString(
616	Str: "--pxtas-path=" + Args.getLastArgValue(Id: options::OPT_ptxas_path_EQ)));
617
618	if (Args.hasArg(Ids: options::OPT_cuda_path_EQ))
619	CmdArgs.push_back(Elt: Args.MakeArgString(
620	Str: "--cuda-path=" + Args.getLastArgValue(Id: options::OPT_cuda_path_EQ)));
621
622	// Add paths specified in LIBRARY_PATH environment variable as -L options.
623	addDirectoryList(Args, CmdArgs, ArgName: "-L", EnvVar: "LIBRARY_PATH");
624
625	// Add standard library search paths passed on the command line.
626	Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L);
627	getToolChain().AddFilePathLibArgs(Args, CmdArgs);
628	AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA);
629
630	if (C.getDriver().isUsingLTO())
631	addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs,
632	IsThinLTO: C.getDriver().getLTOMode() == LTOK_Thin);
633
634	// Forward the PTX features if the nvlink-wrapper needs it.
635	std::vector<StringRef> Features;
636	getNVPTXTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args,
637	Features);
638	CmdArgs.push_back(
639	Elt: Args.MakeArgString(Str: "--plugin-opt=-mattr=" + llvm::join(R&: Features, Separator: ",")));
640
641	// Add paths for the default clang library path.
642	SmallString<`256`> DefaultLibPath =
643	llvm::sys::path::parent_path(path: TC.getDriver().Dir);
644	llvm::sys::path::append(path&: DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
645	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Twine("-L") + DefaultLibPath));
646
647	if (Args.hasArg(Ids: options::OPT_stdlib))
648	CmdArgs.append(IL: {"-lc", "-lm"});
649	if (Args.hasArg(Ids: options::OPT_startfiles)) {
650	std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
651	if (!IncludePath)
652	IncludePath = "/lib";
653	SmallString<`128`> P(*IncludePath);
654	llvm::sys::path::append(path&: P, a: "crt1.o");
655	CmdArgs.push_back(Elt: Args.MakeArgString(Str: P));
656	}
657
658	C.addCommand(C: std::make_unique<Command>(
659	args: JA, args: *this,
660	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
661	.ResponseFlag: "--options-file"},
662	args: Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "clang-nvlink-wrapper")),
663	args&: CmdArgs, args: Inputs, args: Output));
664	}
665
666	void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
667	const llvm::opt::ArgList &Args,
668	std::vector<StringRef> &Features) {
669	if (Args.hasArg(Ids: options::OPT_cuda_feature_EQ)) {
670	StringRef PtxFeature =
671	Args.getLastArgValue(Id: options::OPT_cuda_feature_EQ, Default: "+ptx42");
672	Features.push_back(x: Args.MakeArgString(Str: PtxFeature));
673	return;
674	}
675	CudaInstallationDetector CudaInstallation(D, Triple, Args);
676
677	// New CUDA versions often introduce new instructions that are only supported
678	// by new PTX version, so we need to raise PTX level to enable them in NVPTX
679	// back-end.
680	const char PtxFeature = nullptr*;
681	switch (CudaInstallation.version()) {
682	#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
683	case CudaVersion::CUDA_##CUDA_VER: \
684	PtxFeature = "+ptx" #PTX_VER; \
685	break;
686	CASE_CUDA_VERSION(`128`, `87`);
687	CASE_CUDA_VERSION(`126`, `85`);
688	CASE_CUDA_VERSION(`125`, `85`);
689	CASE_CUDA_VERSION(`124`, `84`);
690	CASE_CUDA_VERSION(`123`, `83`);
691	CASE_CUDA_VERSION(`122`, `82`);
692	CASE_CUDA_VERSION(`121`, `81`);
693	CASE_CUDA_VERSION(`120`, `80`);
694	CASE_CUDA_VERSION(`118`, `78`);
695	CASE_CUDA_VERSION(`117`, `77`);
696	CASE_CUDA_VERSION(`116`, `76`);
697	CASE_CUDA_VERSION(`115`, `75`);
698	CASE_CUDA_VERSION(`114`, `74`);
699	CASE_CUDA_VERSION(`113`, `73`);
700	CASE_CUDA_VERSION(`112`, `72`);
701	CASE_CUDA_VERSION(`111`, `71`);
702	CASE_CUDA_VERSION(`110`, `70`);
703	CASE_CUDA_VERSION(`102`, `65`);
704	CASE_CUDA_VERSION(`101`, `64`);
705	CASE_CUDA_VERSION(`100`, `63`);
706	CASE_CUDA_VERSION(`92`, `61`);
707	CASE_CUDA_VERSION(`91`, `61`);
708	CASE_CUDA_VERSION(`90`, `60`);
709	#undef CASE_CUDA_VERSION
710	// TODO: Use specific CUDA version once it's public.
711	case clang::CudaVersion::NEW:
712	PtxFeature = "+ptx86";
713	break;
714	default:
715	PtxFeature = "+ptx42";
716	}
717	Features.push_back(x: PtxFeature);
718	}
719
720	/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
721	/// operates as a stand-alone version of the NVPTX tools without the host
722	/// toolchain.
723	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
724	const llvm::Triple &HostTriple,
725	const ArgList &Args)
726	: ToolChain (D, Triple, Args), CudaInstallation (D, HostTriple, Args) {
727	if (CudaInstallation.isValid())
728	getProgramPaths().push_back(Elt: std::string (CudaInstallation.getBinPath()));
729	// Lookup binaries into the driver directory, this is used to
730	// discover the 'nvptx-arch' executable.
731	getProgramPaths().push_back(Elt: getDriver().Dir);
732	}
733
734	/// We only need the host triple to locate the CUDA binary utilities, use the
735	/// system's default triple if not provided.
736	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
737	const ArgList &Args)
738	: NVPTXToolChain (D, Triple, llvm::Triple (LLVM_HOST_TRIPLE), Args) {}
739
740	llvm::opt::DerivedArgList *
741	NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
742	StringRef BoundArch,
743	Action::OffloadKind OffloadKind) const {
744	DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind: OffloadKind);
745	if (!DAL)
746	DAL = new DerivedArgList (Args.getBaseArgs());
747
748	const OptTable &Opts = getDriver().getOpts();
749
750	for (Arg *A : Args)
751	if (!llvm::is_contained(Range&: *DAL, Element: A))
752	DAL->append(A);
753
754	if (!DAL->hasArg(Ids: options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
755	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
756	Value: OffloadArchToString(A: OffloadArch::CudaDefault));
757	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "generic" &&
758	OffloadKind == Action::OFK_None) {
759	DAL->eraseArg(Id: options::OPT_march_EQ);
760	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "native") {
761	auto GPUsOrErr = getSystemGPUArchs(Args);
762	if (!GPUsOrErr) {
763	getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch)
764	<< getArchName() << llvm::toString(E: GPUsOrErr.takeError()) << "-march";
765	} else {
766	if (GPUsOrErr ->size() > `1`)
767	getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch)
768	<< getArchName() << llvm::join(R&: *GPUsOrErr, Separator: ", ") << "-march";
769	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
770	Value: Args.MakeArgString(Str: GPUsOrErr ->front()));
771	}
772	}
773
774	return DAL;
775	}
776
777	void NVPTXToolChain::addClangTargetOptions(
778	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
779	Action::OffloadKind DeviceOffloadingKind) const {}
780
781	bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg A) const* {
782	const Option &O = A->getOption();
783	return (O.matches(ID: options::OPT_gN_Group) &&
784	!O.matches(ID: options::OPT_gmodules)) \|\|
785	O.matches(ID: options::OPT_g_Flag) \|\|
786	O.matches(ID: options::OPT_ggdbN_Group) \|\| O.matches(ID: options::OPT_ggdb) \|\|
787	O.matches(ID: options::OPT_gdwarf) \|\| O.matches(ID: options::OPT_gdwarf_2) \|\|
788	O.matches(ID: options::OPT_gdwarf_3) \|\| O.matches(ID: options::OPT_gdwarf_4) \|\|
789	O.matches(ID: options::OPT_gdwarf_5) \|\|
790	O.matches(ID: options::OPT_gcolumn_info);
791	}
792
793	void NVPTXToolChain::adjustDebugInfoKind(
794	llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
795	const ArgList &Args) const {
796	switch (mustEmitDebugInfo(Args)) {
797	case DisableDebugInfo:
798	DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
799	break;
800	case DebugDirectivesOnly:
801	DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
802	break;
803	case EmitSameDebugInfoAsHost:
804	// Use same debug info level as the host.
805	break;
806	}
807	}
808
809	Expected<SmallVector<std::string>>
810	NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
811	// Detect NVIDIA GPUs availible on the system.
812	std::string Program;
813	if (Arg *A = Args.getLastArg(Ids: options::OPT_nvptx_arch_tool_EQ))
814	Program = A->getValue();
815	else
816	Program = GetProgramPath(Name: "nvptx-arch");
817
818	auto StdoutOrErr = executeToolChainProgram(Executable: Program);
819	if (!StdoutOrErr)
820	return StdoutOrErr.takeError();
821
822	SmallVector<std::string, `1`> GPUArchs;
823	for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n"))
824	if (!Arch.empty())
825	GPUArchs.push_back(Elt: Arch.str());
826
827	if (GPUArchs.empty())
828	return llvm::createStringError(EC: std::error_code (),
829	S: "No NVIDIA GPU detected in the system");
830
831	return std::move(GPUArchs);
832	}
833
834	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
835	/// which isn't properly a linker but nonetheless performs the step of stitching
836	/// together object files from the assembler into a single blob.
837
838	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
839	const ToolChain &HostTC, const ArgList &Args)
840	: NVPTXToolChain (D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
841
842	void CudaToolChain::addClangTargetOptions(
843	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
844	Action::OffloadKind DeviceOffloadingKind) const {
845	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind);
846
847	StringRef GpuArch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
848	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
849	DeviceOffloadingKind == Action::OFK_Cuda) &&
850	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
851
852	CC1Args.append(IL: {"-fcuda-is-device", "-mllvm",
853	"-enable-memcpyopt-without-libcalls",
854	"-fno-threadsafe-statics"});
855
856	// Unsized function arguments used for variadics were introduced in CUDA-9.0
857	// We still do not support generating code that actually uses variadic
858	// arguments yet, but we do need to allow parsing them as recent CUDA
859	// headers rely on that. https://github.com/llvm/llvm-project/issues/58410
860	if (CudaInstallation.version() >= CudaVersion::CUDA_90)
861	CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions");
862
863	if (DriverArgs.hasFlag(Pos: options::OPT_fcuda_short_ptr,
864	Neg: options::OPT_fno_cuda_short_ptr, Default: false))
865	CC1Args.append(IL: {"-mllvm", "--nvptx-short-ptr"});
866
867	if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib,
868	Default: true))
869	return;
870
871	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
872	DriverArgs.hasArg(Ids: options::OPT_S))
873	return;
874
875	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Gpu: GpuArch);
876	if (LibDeviceFile.empty()) {
877	getDriver().Diag(DiagID: diag::err_drv_no_cuda_libdevice) << GpuArch;
878	return;
879	}
880
881	CC1Args.push_back(Elt: "-mlink-builtin-bitcode");
882	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: LibDeviceFile));
883
884	// For now, we don't use any Offload/OpenMP device runtime when we offload
885	// CUDA via LLVM/Offload. We should split the Offload/OpenMP device runtime
886	// and include the "generic" (or CUDA-specific) parts.
887	if (DriverArgs.hasFlag(Pos: options::OPT_foffload_via_llvm,
888	Neg: options::OPT_fno_offload_via_llvm, Default: false))
889	return;
890
891	clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
892
893	if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
894	CC1Args.push_back(
895	Elt: DriverArgs.MakeArgString(Str: Twine("-target-sdk-version=") +
896	CudaVersionToString(V: CudaInstallationVersion)));
897
898	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
899	if (CudaInstallationVersion < CudaVersion::CUDA_92) {
900	getDriver().Diag(
901	DiagID: diag::err_drv_omp_offload_target_cuda_version_not_support)
902	<< CudaVersionToString(V: CudaInstallationVersion);
903	return;
904	}
905
906	// Link the bitcode library late if we're using device LTO.
907	if (getDriver().isUsingOffloadLTO())
908	return;
909
910	addOpenMPDeviceRTL(D: getDriver(), DriverArgs, CC1Args, BitcodeSuffix: GpuArch.str(),
911	Triple: getTriple(), HostTC);
912	}
913	}
914
915	llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
916	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
917	const llvm::fltSemantics FPType) const* {
918	if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
919	if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
920	DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero,
921	Neg: options::OPT_fno_gpu_flush_denormals_to_zero, Default: false))
922	return llvm::DenormalMode::getPreserveSign();
923	}
924
925	assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
926	return llvm::DenormalMode::getIEEE();
927	}
928
929	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
930	ArgStringList &CC1Args) const {
931	// Check our CUDA version if we're going to include the CUDA headers.
932	if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
933	Default: true) &&
934	!DriverArgs.hasArg(Ids: options::OPT_no_cuda_version_check)) {
935	StringRef Arch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
936	assert(!Arch.empty() && "Must have an explicit GPU arch.");
937	CudaInstallation.CheckCudaVersionSupportsArch(Arch: StringToOffloadArch(S: Arch));
938	}
939	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
940	}
941
942	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
943	// Only object files are changed, for example assembly files keep their .s
944	// extensions. If the user requested device-only compilation don't change it.
945	if (Input.getType() != types::TY_Object \|\| getDriver().offloadDeviceOnly())
946	return ToolChain::getInputFilename(Input);
947
948	return ToolChain::getInputFilename(Input);
949	}
950
951	llvm::opt::DerivedArgList *
952	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
953	StringRef BoundArch,
954	Action::OffloadKind DeviceOffloadKind) const {
955	DerivedArgList *DAL =
956	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
957	if (!DAL)
958	DAL = new DerivedArgList (Args.getBaseArgs());
959
960	const OptTable &Opts = getDriver().getOpts();
961
962	for (Arg *A : Args) {
963	// Make sure flags are not duplicated.
964	if (!llvm::is_contained(Range&: *DAL, Element: A)) {
965	DAL->append(A);
966	}
967	}
968
969	if (!BoundArch.empty()) {
970	DAL->eraseArg(Id: options::OPT_march_EQ);
971	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
972	Value: BoundArch);
973	}
974	return DAL;
975	}
976
977	Tool NVPTXToolChain::buildAssembler() const* {
978	return new tools::NVPTX::Assembler (*this);
979	}
980
981	Tool NVPTXToolChain::buildLinker() const* {
982	return new tools::NVPTX::Linker (*this);
983	}
984
985	Tool CudaToolChain::buildAssembler() const* {
986	return new tools::NVPTX::Assembler (*this);
987	}
988
989	Tool CudaToolChain::buildLinker() const* {
990	return new tools::NVPTX::FatBinary (*this);
991	}
992
993	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
994	HostTC.addClangWarningOptions(CC1Args);
995	}
996
997	ToolChain::CXXStdlibType
998	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
999	return HostTC.GetCXXStdlibType(Args);
1000	}
1001
1002	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1003	ArgStringList &CC1Args) const {
1004	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1005
1006	if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
1007	Default: true) &&
1008	CudaInstallation.isValid())
1009	CC1Args.append(
1010	IL: {"-internal-isystem",
1011	DriverArgs.MakeArgString(Str: CudaInstallation.getIncludePath())});
1012	}
1013
1014	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1015	ArgStringList &CC1Args) const {
1016	HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
1017	}
1018
1019	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1020	ArgStringList &CC1Args) const {
1021	HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
1022	}
1023
1024	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
1025	// The CudaToolChain only supports sanitizers in the sense that it allows
1026	// sanitizer arguments on the command line if they are supported by the host
1027	// toolchain. The CudaToolChain will actually ignore any command line
1028	// arguments for any of these "supported" sanitizers. That means that no
1029	// sanitization of device code is actually supported at this time.
1030	//
1031	// This behavior is necessary because the host and device toolchains
1032	// invocations often share the command line, so the device toolchain must
1033	// tolerate flags meant only for the host toolchain.
1034	return HostTC.getSupportedSanitizers();
1035	}
1036
1037	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1038	const ArgList &Args) const {
1039	return HostTC.computeMSVCVersion(D, Args);
1040	}
1041

Browse the source code of llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp