Cuda.cpp source code [llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp]

1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Cuda.h"
10	#include "CommonArgs.h"
11	#include "clang/Basic/Cuda.h"
12	#include "clang/Config/config.h"
13	#include "clang/Driver/Compilation.h"
14	#include "clang/Driver/Distro.h"
15	#include "clang/Driver/Driver.h"
16	#include "clang/Driver/DriverDiagnostic.h"
17	#include "clang/Driver/InputInfo.h"
18	#include "clang/Driver/Options.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/Option/ArgList.h"
21	#include "llvm/Support/FileSystem.h"
22	#include "llvm/Support/FormatAdapters.h"
23	#include "llvm/Support/FormatVariadic.h"
24	#include "llvm/Support/Path.h"
25	#include "llvm/Support/Process.h"
26	#include "llvm/Support/Program.h"
27	#include "llvm/Support/VirtualFileSystem.h"
28	#include "llvm/TargetParser/Host.h"
29	#include "llvm/TargetParser/TargetParser.h"
30	#include <system_error>
31
32	using namespace clang::driver;
33	using namespace clang::driver::toolchains;
34	using namespace clang::driver::tools;
35	using namespace clang;
36	using namespace llvm::opt;
37
38	namespace {
39
40	CudaVersion getCudaVersion(uint32_t raw_version) {
41	if (raw_version < `7050`)
42	return CudaVersion::CUDA_70;
43	if (raw_version < `8000`)
44	return CudaVersion::CUDA_75;
45	if (raw_version < `9000`)
46	return CudaVersion::CUDA_80;
47	if (raw_version < `9010`)
48	return CudaVersion::CUDA_90;
49	if (raw_version < `9020`)
50	return CudaVersion::CUDA_91;
51	if (raw_version < `10000`)
52	return CudaVersion::CUDA_92;
53	if (raw_version < `10010`)
54	return CudaVersion::CUDA_100;
55	if (raw_version < `10020`)
56	return CudaVersion::CUDA_101;
57	if (raw_version < `11000`)
58	return CudaVersion::CUDA_102;
59	if (raw_version < `11010`)
60	return CudaVersion::CUDA_110;
61	if (raw_version < `11020`)
62	return CudaVersion::CUDA_111;
63	if (raw_version < `11030`)
64	return CudaVersion::CUDA_112;
65	if (raw_version < `11040`)
66	return CudaVersion::CUDA_113;
67	if (raw_version < `11050`)
68	return CudaVersion::CUDA_114;
69	if (raw_version < `11060`)
70	return CudaVersion::CUDA_115;
71	if (raw_version < `11070`)
72	return CudaVersion::CUDA_116;
73	if (raw_version < `11080`)
74	return CudaVersion::CUDA_117;
75	if (raw_version < `11090`)
76	return CudaVersion::CUDA_118;
77	if (raw_version < `12010`)
78	return CudaVersion::CUDA_120;
79	if (raw_version < `12020`)
80	return CudaVersion::CUDA_121;
81	if (raw_version < `12030`)
82	return CudaVersion::CUDA_122;
83	if (raw_version < `12040`)
84	return CudaVersion::CUDA_123;
85	if (raw_version < `12050`)
86	return CudaVersion::CUDA_124;
87	if (raw_version < `12060`)
88	return CudaVersion::CUDA_125;
89	return CudaVersion::NEW;
90	}
91
92	CudaVersion parseCudaHFile(llvm::StringRef Input) {
93	// Helper lambda which skips the words if the line starts with them or returns
94	// std::nullopt otherwise.
95	auto StartsWithWords =
96	[](llvm::StringRef Line,
97	const SmallVector<StringRef, `3`> words) -> std::optional<StringRef> {
98	for (StringRef word : words) {
99	if (!Line.consume_front(Prefix: word))
100	return {};
101	Line = Line.ltrim();
102	}
103	return Line;
104	};
105
106	Input = Input.ltrim();
107	while (!Input.empty()) {
108	if (auto Line =
109	StartsWithWords (Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
110	uint32_t RawVersion;
111	Line ->consumeInteger(Radix: `10`, Result&: RawVersion);
112	return getCudaVersion(raw_version: RawVersion);
113	}
114	// Find next non-empty line.
115	Input = Input.drop_front(N: Input.find_first_of(Chars: "\n\r")).ltrim();
116	}
117	return CudaVersion::UNKNOWN;
118	}
119	} // namespace
120
121	void CudaInstallationDetector::WarnIfUnsupportedVersion() {
122	if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
123	std::string VersionString = CudaVersionToString(V: Version);
124	if (!VersionString.empty())
125	VersionString.insert(pos: `0`, s: " ");
126	D.Diag(DiagID: diag::warn_drv_new_cuda_version)
127	<< VersionString
128	<< (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
129	<< CudaVersionToString(V: CudaVersion::PARTIALLY_SUPPORTED);
130	} else if (Version > CudaVersion::FULLY_SUPPORTED)
131	D.Diag(DiagID: diag::warn_drv_partially_supported_cuda_version)
132	<< CudaVersionToString(V: Version);
133	}
134
135	CudaInstallationDetector::CudaInstallationDetector(
136	const Driver &D, const llvm::Triple &HostTriple,
137	const llvm::opt::ArgList &Args)
138	: D(D) {
139	struct Candidate {
140	std::string Path;
141	bool StrictChecking;
142
143	Candidate(std::string Path, bool StrictChecking = false)
144	: Path (Path), StrictChecking(StrictChecking) {}
145	};
146	SmallVector<Candidate, `4`> Candidates;
147
148	// In decreasing order so we prefer newer versions to older versions.
149	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
150	auto &FS = D.getVFS();
151
152	if (Args.hasArg(Ids: clang::driver::options::OPT_cuda_path_EQ)) {
153	Candidates.emplace_back(
154	Args: Args.getLastArgValue(Id: clang::driver::options::OPT_cuda_path_EQ).str());
155	} else if (HostTriple.isOSWindows()) {
156	for (const char *Ver : Versions)
157	Candidates.emplace_back(
158	Args: D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
159	Ver);
160	} else {
161	if (!Args.hasArg(Ids: clang::driver::options::OPT_cuda_path_ignore_env)) {
162	// Try to find ptxas binary. If the executable is located in a directory
163	// called 'bin/', its parent directory might be a good guess for a valid
164	// CUDA installation.
165	// However, some distributions might installs 'ptxas' to /usr/bin. In that
166	// case the candidate would be '/usr' which passes the following checks
167	// because '/usr/include' exists as well. To avoid this case, we always
168	// check for the directory potentially containing files for libdevice,
169	// even if the user passes -nocudalib.
170	if (llvm::ErrorOr<std::string> ptxas =
171	llvm::sys::findProgramByName(Name: "ptxas")) {
172	SmallString<`256`> ptxasAbsolutePath;
173	llvm::sys::fs::real_path(path: *ptxas, output&: ptxasAbsolutePath);
174
175	StringRef ptxasDir = llvm::sys::path::parent_path(path: ptxasAbsolutePath);
176	if (llvm::sys::path::filename(path: ptxasDir) == "bin")
177	Candidates.emplace_back(
178	Args: std::string (llvm::sys::path::parent_path(path: ptxasDir)),
179	/StrictChecking=/Args: true);
180	}
181	}
182
183	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda");
184	for (const char *Ver : Versions)
185	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda-" + Ver);
186
187	Distro Dist(FS, llvm::Triple (llvm::sys::getProcessTriple()));
188	if (Dist.IsDebian() \|\| Dist.IsUbuntu())
189	// Special case for Debian to have nvidia-cuda-toolkit work
190	// out of the box. More info on http://bugs.debian.org/882505
191	Candidates.emplace_back(Args: D.SysRoot + "/usr/lib/cuda");
192	}
193
194	bool NoCudaLib = Args.hasArg(Ids: options::OPT_nogpulib);
195
196	for (const auto &Candidate : Candidates) {
197	InstallPath = Candidate.Path;
198	if (InstallPath.empty() \|\| !FS.exists(Path: InstallPath))
199	continue;
200
201	BinPath = InstallPath + "/bin";
202	IncludePath = InstallPath + "/include";
203	LibDevicePath = InstallPath + "/nvvm/libdevice";
204
205	if (!(FS.exists(Path: IncludePath) && FS.exists(Path: BinPath)))
206	continue;
207	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
208	if (CheckLibDevice && !FS.exists(Path: LibDevicePath))
209	continue;
210
211	Version = CudaVersion::UNKNOWN;
212	if (auto CudaHFile = FS.getBufferForFile(Name: InstallPath + "/include/cuda.h"))
213	Version = parseCudaHFile(Input: (*CudaHFile)->getBuffer());
214	// As the last resort, make an educated guess between CUDA-7.0, which had
215	// old-style libdevice bitcode, and an unknown recent CUDA version.
216	if (Version == CudaVersion::UNKNOWN) {
217	Version = FS.exists(Path: LibDevicePath + "/libdevice.10.bc")
218	? CudaVersion::NEW
219	: CudaVersion::CUDA_70;
220	}
221
222	if (Version >= CudaVersion::CUDA_90) {
223	// CUDA-9+ uses single libdevice file for all GPU variants.
224	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
225	if (FS.exists(Path: FilePath)) {
226	for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
227	Arch < E; ++Arch) {
228	OffloadArch OA = static_cast<OffloadArch>(Arch);
229	if (!IsNVIDIAOffloadArch(A: OA))
230	continue;
231	std::string OffloadArchName(OffloadArchToString(A: OA));
232	LibDeviceMap [OffloadArchName] = FilePath;
233	}
234	}
235	} else {
236	std::error_code EC;
237	for (llvm::vfs::directory_iterator LI = FS.dir_begin(Dir: LibDevicePath, EC),
238	LE;
239	!EC && LI != LE; LI = LI.increment(EC)) {
240	StringRef FilePath = LI ->path();
241	StringRef FileName = llvm::sys::path::filename(path: FilePath);
242	// Process all bitcode filenames that look like
243	// libdevice.compute_XX.YY.bc
244	const StringRef LibDeviceName = "libdevice.";
245	if (!(FileName.starts_with(Prefix: LibDeviceName) && FileName.ends_with(Suffix: ".bc")))
246	continue;
247	StringRef GpuArch = FileName.slice(
248	Start: LibDeviceName.size(), End: FileName.find(C: `'.'`, From: LibDeviceName.size()));
249	LibDeviceMap [GpuArch] = FilePath.str();
250	// Insert map entries for specific devices with this compute
251	// capability. NVCC's choice of the libdevice library version is
252	// rather peculiar and depends on the CUDA version.
253	if (GpuArch == "compute_20") {
254	LibDeviceMap ["sm_20"] = std::string (FilePath);
255	LibDeviceMap ["sm_21"] = std::string (FilePath);
256	LibDeviceMap ["sm_32"] = std::string (FilePath);
257	} else if (GpuArch == "compute_30") {
258	LibDeviceMap ["sm_30"] = std::string (FilePath);
259	if (Version < CudaVersion::CUDA_80) {
260	LibDeviceMap ["sm_50"] = std::string (FilePath);
261	LibDeviceMap ["sm_52"] = std::string (FilePath);
262	LibDeviceMap ["sm_53"] = std::string (FilePath);
263	}
264	LibDeviceMap ["sm_60"] = std::string (FilePath);
265	LibDeviceMap ["sm_61"] = std::string (FilePath);
266	LibDeviceMap ["sm_62"] = std::string (FilePath);
267	} else if (GpuArch == "compute_35") {
268	LibDeviceMap ["sm_35"] = std::string (FilePath);
269	LibDeviceMap ["sm_37"] = std::string (FilePath);
270	} else if (GpuArch == "compute_50") {
271	if (Version >= CudaVersion::CUDA_80) {
272	LibDeviceMap ["sm_50"] = std::string (FilePath);
273	LibDeviceMap ["sm_52"] = std::string (FilePath);
274	LibDeviceMap ["sm_53"] = std::string (FilePath);
275	}
276	}
277	}
278	}
279
280	// Check that we have found at least one libdevice that we can link in if
281	// -nocudalib hasn't been specified.
282	if (LibDeviceMap.empty() && !NoCudaLib)
283	continue;
284
285	IsValid = true;
286	break;
287	}
288	}
289
290	void CudaInstallationDetector::AddCudaIncludeArgs(
291	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
292	if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) {
293	// Add cuda_wrappers/ to our system include path. This lets us wrap*
294	// standard library headers.
295	SmallString<`128`> P(D.ResourceDir);
296	llvm::sys::path::append(path&: P, a: "include");
297	llvm::sys::path::append(path&: P, a: "cuda_wrappers");
298	CC1Args.push_back(Elt: "-internal-isystem");
299	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P));
300	}
301
302	if (DriverArgs.hasArg(Ids: options::OPT_nogpuinc))
303	return;
304
305	if (!isValid()) {
306	D.Diag(DiagID: diag::err_drv_no_cuda_installation);
307	return;
308	}
309
310	CC1Args.push_back(Elt: "-include");
311	CC1Args.push_back(Elt: "__clang_cuda_runtime_wrapper.h");
312	}
313
314	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
315	OffloadArch Arch) const {
316	if (Arch == OffloadArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
317	ArchsWithBadVersion [(int)Arch])
318	return;
319
320	auto MinVersion = MinVersionForOffloadArch(A: Arch);
321	auto MaxVersion = MaxVersionForOffloadArch(A: Arch);
322	if (Version < MinVersion \|\| Version > MaxVersion) {
323	ArchsWithBadVersion [(int)Arch] = true;
324	D.Diag(DiagID: diag::err_drv_cuda_version_unsupported)
325	<< OffloadArchToString(A: Arch) << CudaVersionToString(V: MinVersion)
326	<< CudaVersionToString(V: MaxVersion) << InstallPath
327	<< CudaVersionToString(V: Version);
328	}
329	}
330
331	void CudaInstallationDetector::print(raw_ostream &OS) const {
332	if (isValid())
333	OS << "Found CUDA installation: " << InstallPath << ", version "
334	<< CudaVersionToString(V: Version) << "\n";
335	}
336
337	namespace {
338	/// Debug info level for the NVPTX devices. We may need to emit different debug
339	/// info level for the host and for the device itselfi. This type controls
340	/// emission of the debug info for the devices. It either prohibits disable info
341	/// emission completely, or emits debug directives only, or emits same debug
342	/// info as for the host.
343	enum DeviceDebugInfoLevel {
344	DisableDebugInfo, /// Do not emit debug info for the devices.
345	DebugDirectivesOnly, /// Emit only debug directives.
346	EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
347	/// host.
348	};
349	} // anonymous namespace
350
351	/// Define debug info level for the NVPTX devices. If the debug info for both
352	/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
353	/// only debug directives are requested for the both host and device
354	/// (-gline-directvies-only), or the debug info only for the device is disabled
355	/// (optimization is on and --cuda-noopt-device-debug was not specified), the
356	/// debug directves only must be emitted for the device. Otherwise, use the same
357	/// debug info level just like for the host (with the limitations of only
358	/// supported DWARF2 standard).
359	static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
360	const Arg *A = Args.getLastArg(Ids: options::OPT_O_Group);
361	bool IsDebugEnabled = !A \|\| A->getOption().matches(ID: options::OPT_O0) \|\|
362	Args.hasFlag(Pos: options::OPT_cuda_noopt_device_debug,
363	Neg: options::OPT_no_cuda_noopt_device_debug,
364	/Default=/false);
365	if (const Arg *A = Args.getLastArg(Ids: options::OPT_g_Group)) {
366	const Option &Opt = A->getOption();
367	if (Opt.matches(ID: options::OPT_gN_Group)) {
368	if (Opt.matches(ID: options::OPT_g0) \|\| Opt.matches(ID: options::OPT_ggdb0))
369	return DisableDebugInfo;
370	if (Opt.matches(ID: options::OPT_gline_directives_only))
371	return DebugDirectivesOnly;
372	}
373	return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
374	}
375	return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
376	}
377
378	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
379	const InputInfo &Output,
380	const InputInfoList &Inputs,
381	const ArgList &Args,
382	const char LinkingOutput) const* {
383	const auto &TC =
384	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
385	assert(TC.getTriple().isNVPTX() && "Wrong platform");
386
387	StringRef GPUArchName;
388	// If this is a CUDA action we need to extract the device architecture
389	// from the Job's associated architecture, otherwise use the -march=arch
390	// option. This option may come from -Xopenmp-target flag or the default
391	// value.
392	if (JA.isDeviceOffloading(OKind: Action::OFK_Cuda)) {
393	GPUArchName = JA.getOffloadingArch();
394	} else {
395	GPUArchName = Args.getLastArgValue(Id: options::OPT_march_EQ);
396	if (GPUArchName.empty()) {
397	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
398	<< getToolChain().getArchName() << getShortName();
399	return;
400	}
401	}
402
403	// Obtain architecture from the action.
404	OffloadArch gpu_arch = StringToOffloadArch(S: GPUArchName);
405	assert(gpu_arch != OffloadArch::UNKNOWN &&
406	"Device action expected to have an architecture.");
407
408	// Check that our installation's ptxas supports gpu_arch.
409	if (!Args.hasArg(Ids: options::OPT_no_cuda_version_check)) {
410	TC.CudaInstallation.CheckCudaVersionSupportsArch(Arch: gpu_arch);
411	}
412
413	ArgStringList CmdArgs;
414	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
415	DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
416	if (DIKind == EmitSameDebugInfoAsHost) {
417	// ptxas does not accept -g option if optimization is enabled, so
418	// we ignore the compiler's -O options if we want debug info.*
419	CmdArgs.push_back(Elt: "-g");
420	CmdArgs.push_back(Elt: "--dont-merge-basicblocks");
421	CmdArgs.push_back(Elt: "--return-at-end");
422	} else if (Arg *A = Args.getLastArg(Ids: options::OPT_O_Group)) {
423	// Map the -O we received to -O{0,1,2,3}.
424	//
425	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
426	// default, so it may correspond more closely to the spirit of clang -O2.
427
428	// -O3 seems like the least-bad option when -Osomething is specified to
429	// clang but it isn't handled below.
430	StringRef OOpt = "3";
431	if (A->getOption().matches(ID: options::OPT_O4) \|\|
432	A->getOption().matches(ID: options::OPT_Ofast))
433	OOpt = "3";
434	else if (A->getOption().matches(ID: options::OPT_O0))
435	OOpt = "0";
436	else if (A->getOption().matches(ID: options::OPT_O)) {
437	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
438	OOpt = llvm::StringSwitch<const char *>(A->getValue())
439	.Case(S: "1", Value: "1")
440	.Case(S: "2", Value: "2")
441	.Case(S: "3", Value: "3")
442	.Case(S: "s", Value: "2")
443	.Case(S: "z", Value: "2")
444	.Default(Value: "2");
445	}
446	CmdArgs.push_back(Elt: Args.MakeArgString(Str: llvm::Twine ("-O") + OOpt));
447	} else {
448	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
449	// to no optimizations, but ptxas's default is -O3.
450	CmdArgs.push_back(Elt: "-O0");
451	}
452	if (DIKind == DebugDirectivesOnly)
453	CmdArgs.push_back(Elt: "-lineinfo");
454
455	// Pass -v to ptxas if it was passed to the driver.
456	if (Args.hasArg(Ids: options::OPT_v))
457	CmdArgs.push_back(Elt: "-v");
458
459	CmdArgs.push_back(Elt: "--gpu-name");
460	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OffloadArchToString(A: gpu_arch)));
461	CmdArgs.push_back(Elt: "--output-file");
462	std::string OutputFileName = TC.getInputFilename(Input: Output);
463
464	if (Output.isFilename() && OutputFileName != Output.getFilename())
465	C.addTempFile(Name: Args.MakeArgString(Str: OutputFileName));
466
467	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OutputFileName));
468	for (const auto &II : Inputs)
469	CmdArgs.push_back(Elt: Args.MakeArgString(Str: II.getFilename()));
470
471	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_ptxas))
472	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
473
474	bool Relocatable;
475	if (JA.isOffloading(OKind: Action::OFK_OpenMP))
476	// In OpenMP we need to generate relocatable code.
477	Relocatable = Args.hasFlag(Pos: options::OPT_fopenmp_relocatable_target,
478	Neg: options::OPT_fnoopenmp_relocatable_target,
479	/Default=/true);
480	else if (JA.isOffloading(OKind: Action::OFK_Cuda))
481	// In CUDA we generate relocatable code by default.
482	Relocatable = Args.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc,
483	/Default=/false);
484	else
485	// Otherwise, we are compiling directly and should create linkable output.
486	Relocatable = true;
487
488	if (Relocatable)
489	CmdArgs.push_back(Elt: "-c");
490
491	const char *Exec;
492	if (Arg *A = Args.getLastArg(Ids: options::OPT_ptxas_path_EQ))
493	Exec = A->getValue();
494	else
495	Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "ptxas"));
496	C.addCommand(C: std::make_unique<Command>(
497	args: JA, args: *this,
498	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
499	.ResponseFlag: "--options-file"},
500	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
501	}
502
503	static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
504	// The new driver does not include PTX by default to avoid overhead.
505	bool includePTX = !Args.hasFlag(Pos: options::OPT_offload_new_driver,
506	Neg: options::OPT_no_offload_new_driver, Default: false);
507	for (Arg *A : Args.filtered(Ids: options::OPT_cuda_include_ptx_EQ,
508	Ids: options::OPT_no_cuda_include_ptx_EQ)) {
509	A->claim();
510	const StringRef ArchStr = A->getValue();
511	if (A->getOption().matches(ID: options::OPT_cuda_include_ptx_EQ) &&
512	(ArchStr == "all" \|\| ArchStr == InputArch))
513	includePTX = true;
514	else if (A->getOption().matches(ID: options::OPT_no_cuda_include_ptx_EQ) &&
515	(ArchStr == "all" \|\| ArchStr == InputArch))
516	includePTX = false;
517	}
518	return includePTX;
519	}
520
521	// All inputs to this linker must be from CudaDeviceActions, as we need to look
522	// at the Inputs' Actions in order to figure out which GPU architecture they
523	// correspond to.
524	void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
525	const InputInfo &Output,
526	const InputInfoList &Inputs,
527	const ArgList &Args,
528	const char LinkingOutput) const* {
529	const auto &TC =
530	static_cast<const toolchains::CudaToolChain &>(getToolChain());
531	assert(TC.getTriple().isNVPTX() && "Wrong platform");
532
533	ArgStringList CmdArgs;
534	if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
535	CmdArgs.push_back(Elt: "--cuda");
536	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-64" : "-32");
537	CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--create"));
538	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Output.getFilename()));
539	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
540	CmdArgs.push_back(Elt: "-g");
541
542	for (const auto &II : Inputs) {
543	auto *A = II.getAction();
544	assert(A->getInputs().size() == `1` &&
545	"Device offload action is expected to have a single input");
546	const char *gpu_arch_str = A->getOffloadingArch();
547	assert(gpu_arch_str &&
548	"Device action expected to have associated a GPU architecture!");
549	OffloadArch gpu_arch = StringToOffloadArch(S: gpu_arch_str);
550
551	if (II.getType() == types::TY_PP_Asm &&
552	!shouldIncludePTX(Args, InputArch: gpu_arch_str))
553	continue;
554	// We need to pass an Arch of the form "sm_XX" for cubin files and
555	// "compute_XX" for ptx.
556	const char *Arch = (II.getType() == types::TY_PP_Asm)
557	? OffloadArchToVirtualArchString(A: gpu_arch)
558	: gpu_arch_str;
559	CmdArgs.push_back(
560	Elt: Args.MakeArgString(Str: llvm::Twine ("--image=profile=") + Arch +
561	",file=" + getToolChain().getInputFilename(Input: II)));
562	}
563
564	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_fatbinary))
565	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
566
567	const char *Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "fatbinary"));
568	C.addCommand(C: std::make_unique<Command>(
569	args: JA, args: *this,
570	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
571	.ResponseFlag: "--options-file"},
572	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
573	}
574
575	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
576	const InputInfo &Output,
577	const InputInfoList &Inputs,
578	const ArgList &Args,
579	const char LinkingOutput) const* {
580	const auto &TC =
581	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
582	ArgStringList CmdArgs;
583
584	assert(TC.getTriple().isNVPTX() && "Wrong platform");
585
586	assert((Output.isFilename() \|\| Output.isNothing()) && "Invalid output.");
587	if (Output.isFilename()) {
588	CmdArgs.push_back(Elt: "-o");
589	CmdArgs.push_back(Elt: Output.getFilename());
590	}
591
592	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
593	CmdArgs.push_back(Elt: "-g");
594
595	if (Args.hasArg(Ids: options::OPT_v))
596	CmdArgs.push_back(Elt: "-v");
597
598	StringRef GPUArch = Args.getLastArgValue(Id: options::OPT_march_EQ);
599	if (GPUArch.empty()) {
600	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
601	<< getToolChain().getArchName() << getShortName();
602	return;
603	}
604
605	CmdArgs.push_back(Elt: "-arch");
606	CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch));
607
608	if (Args.hasArg(Ids: options::OPT_ptxas_path_EQ))
609	CmdArgs.push_back(Elt: Args.MakeArgString(
610	Str: "--pxtas-path=" + Args.getLastArgValue(Id: options::OPT_ptxas_path_EQ)));
611
612	if (Args.hasArg(Ids: options::OPT_cuda_path_EQ))
613	CmdArgs.push_back(Elt: Args.MakeArgString(
614	Str: "--cuda-path=" + Args.getLastArgValue(Id: options::OPT_cuda_path_EQ)));
615
616	// Add paths specified in LIBRARY_PATH environment variable as -L options.
617	addDirectoryList(Args, CmdArgs, ArgName: "-L", EnvVar: "LIBRARY_PATH");
618
619	// Add standard library search paths passed on the command line.
620	Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L);
621	getToolChain().AddFilePathLibArgs(Args, CmdArgs);
622	AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA);
623
624	if (C.getDriver().isUsingLTO())
625	addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Input: Inputs [`0`],
626	IsThinLTO: C.getDriver().getLTOMode() == LTOK_Thin);
627
628	// Add paths for the default clang library path.
629	SmallString<`256`> DefaultLibPath =
630	llvm::sys::path::parent_path(path: TC.getDriver().Dir);
631	llvm::sys::path::append(path&: DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
632	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Twine ("-L") + DefaultLibPath));
633
634	C.addCommand(C: std::make_unique<Command>(
635	args: JA, args: *this,
636	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
637	.ResponseFlag: "--options-file"},
638	args: Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "clang-nvlink-wrapper")),
639	args&: CmdArgs, args: Inputs, args: Output));
640	}
641
642	void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
643	const llvm::opt::ArgList &Args,
644	std::vector<StringRef> &Features) {
645	if (Args.hasArg(Ids: options::OPT_cuda_feature_EQ)) {
646	StringRef PtxFeature =
647	Args.getLastArgValue(Id: options::OPT_cuda_feature_EQ, Default: "+ptx42");
648	Features.push_back(x: Args.MakeArgString(Str: PtxFeature));
649	return;
650	}
651	CudaInstallationDetector CudaInstallation(D, Triple, Args);
652
653	// New CUDA versions often introduce new instructions that are only supported
654	// by new PTX version, so we need to raise PTX level to enable them in NVPTX
655	// back-end.
656	const char PtxFeature = nullptr*;
657	switch (CudaInstallation.version()) {
658	#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
659	case CudaVersion::CUDA_##CUDA_VER: \
660	PtxFeature = "+ptx" #PTX_VER; \
661	break;
662	CASE_CUDA_VERSION(`125`, `85`);
663	CASE_CUDA_VERSION(`124`, `84`);
664	CASE_CUDA_VERSION(`123`, `83`);
665	CASE_CUDA_VERSION(`122`, `82`);
666	CASE_CUDA_VERSION(`121`, `81`);
667	CASE_CUDA_VERSION(`120`, `80`);
668	CASE_CUDA_VERSION(`118`, `78`);
669	CASE_CUDA_VERSION(`117`, `77`);
670	CASE_CUDA_VERSION(`116`, `76`);
671	CASE_CUDA_VERSION(`115`, `75`);
672	CASE_CUDA_VERSION(`114`, `74`);
673	CASE_CUDA_VERSION(`113`, `73`);
674	CASE_CUDA_VERSION(`112`, `72`);
675	CASE_CUDA_VERSION(`111`, `71`);
676	CASE_CUDA_VERSION(`110`, `70`);
677	CASE_CUDA_VERSION(`102`, `65`);
678	CASE_CUDA_VERSION(`101`, `64`);
679	CASE_CUDA_VERSION(`100`, `63`);
680	CASE_CUDA_VERSION(`92`, `61`);
681	CASE_CUDA_VERSION(`91`, `61`);
682	CASE_CUDA_VERSION(`90`, `60`);
683	#undef CASE_CUDA_VERSION
684	default:
685	PtxFeature = "+ptx42";
686	}
687	Features.push_back(x: PtxFeature);
688	}
689
690	/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
691	/// operates as a stand-alone version of the NVPTX tools without the host
692	/// toolchain.
693	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
694	const llvm::Triple &HostTriple,
695	const ArgList &Args, bool Freestanding = false)
696	: ToolChain (D, Triple, Args), CudaInstallation (D, HostTriple, Args),
697	Freestanding(Freestanding) {
698	if (CudaInstallation.isValid())
699	getProgramPaths().push_back(Elt: std::string (CudaInstallation.getBinPath()));
700	// Lookup binaries into the driver directory, this is used to
701	// discover the 'nvptx-arch' executable.
702	getProgramPaths().push_back(Elt: getDriver().Dir);
703	}
704
705	/// We only need the host triple to locate the CUDA binary utilities, use the
706	/// system's default triple if not provided.
707	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
708	const ArgList &Args)
709	: NVPTXToolChain (D, Triple, llvm::Triple (LLVM_HOST_TRIPLE), Args,
710	/Freestanding=/true) {}
711
712	llvm::opt::DerivedArgList *
713	NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
714	StringRef BoundArch,
715	Action::OffloadKind OffloadKind) const {
716	DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind: OffloadKind);
717	if (!DAL)
718	DAL = new DerivedArgList (Args.getBaseArgs());
719
720	const OptTable &Opts = getDriver().getOpts();
721
722	for (Arg *A : Args)
723	if (!llvm::is_contained(Range&: *DAL, Element: A))
724	DAL->append(A);
725
726	if (!DAL->hasArg(Ids: options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
727	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
728	Value: OffloadArchToString(A: OffloadArch::CudaDefault));
729	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "generic" &&
730	OffloadKind == Action::OFK_None) {
731	DAL->eraseArg(Id: options::OPT_march_EQ);
732	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "native") {
733	auto GPUsOrErr = getSystemGPUArchs(Args);
734	if (!GPUsOrErr) {
735	getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch)
736	<< getArchName() << llvm::toString(E: GPUsOrErr.takeError()) << "-march";
737	} else {
738	if (GPUsOrErr ->size() > `1`)
739	getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch)
740	<< getArchName() << llvm::join(R&: *GPUsOrErr, Separator: ", ") << "-march";
741	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
742	Value: Args.MakeArgString(Str: GPUsOrErr ->front()));
743	}
744	}
745
746	return DAL;
747	}
748
749	void NVPTXToolChain::addClangTargetOptions(
750	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
751	Action::OffloadKind DeviceOffloadingKind) const {
752	// If we are compiling with a standalone NVPTX toolchain we want to try to
753	// mimic a standard environment as much as possible. So we enable lowering
754	// ctor / dtor functions to global symbols that can be registered.
755	if (Freestanding)
756	CC1Args.append(IL: {"-mllvm", "--nvptx-lower-global-ctor-dtor"});
757	}
758
759	bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg A) const* {
760	const Option &O = A->getOption();
761	return (O.matches(ID: options::OPT_gN_Group) &&
762	!O.matches(ID: options::OPT_gmodules)) \|\|
763	O.matches(ID: options::OPT_g_Flag) \|\|
764	O.matches(ID: options::OPT_ggdbN_Group) \|\| O.matches(ID: options::OPT_ggdb) \|\|
765	O.matches(ID: options::OPT_gdwarf) \|\| O.matches(ID: options::OPT_gdwarf_2) \|\|
766	O.matches(ID: options::OPT_gdwarf_3) \|\| O.matches(ID: options::OPT_gdwarf_4) \|\|
767	O.matches(ID: options::OPT_gdwarf_5) \|\|
768	O.matches(ID: options::OPT_gcolumn_info);
769	}
770
771	void NVPTXToolChain::adjustDebugInfoKind(
772	llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
773	const ArgList &Args) const {
774	switch (mustEmitDebugInfo(Args)) {
775	case DisableDebugInfo:
776	DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
777	break;
778	case DebugDirectivesOnly:
779	DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
780	break;
781	case EmitSameDebugInfoAsHost:
782	// Use same debug info level as the host.
783	break;
784	}
785	}
786
787	Expected<SmallVector<std::string>>
788	NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
789	// Detect NVIDIA GPUs availible on the system.
790	std::string Program;
791	if (Arg *A = Args.getLastArg(Ids: options::OPT_nvptx_arch_tool_EQ))
792	Program = A->getValue();
793	else
794	Program = GetProgramPath(Name: "nvptx-arch");
795
796	auto StdoutOrErr = executeToolChainProgram(Executable: Program, /SecondsToWait=/`10`);
797	if (!StdoutOrErr)
798	return StdoutOrErr.takeError();
799
800	SmallVector<std::string, `1`> GPUArchs;
801	for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n"))
802	if (!Arch.empty())
803	GPUArchs.push_back(Elt: Arch.str());
804
805	if (GPUArchs.empty())
806	return llvm::createStringError(EC: std::error_code (),
807	S: "No NVIDIA GPU detected in the system");
808
809	return std::move(GPUArchs);
810	}
811
812	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
813	/// which isn't properly a linker but nonetheless performs the step of stitching
814	/// together object files from the assembler into a single blob.
815
816	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
817	const ToolChain &HostTC, const ArgList &Args)
818	: NVPTXToolChain (D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
819
820	void CudaToolChain::addClangTargetOptions(
821	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
822	Action::OffloadKind DeviceOffloadingKind) const {
823	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind);
824
825	StringRef GpuArch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
826	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
827	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
828	DeviceOffloadingKind == Action::OFK_Cuda) &&
829	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
830
831	if (DeviceOffloadingKind == Action::OFK_Cuda) {
832	CC1Args.append(
833	IL: {"-fcuda-is-device", "-mllvm", "-enable-memcpyopt-without-libcalls"});
834
835	// Unsized function arguments used for variadics were introduced in CUDA-9.0
836	// We still do not support generating code that actually uses variadic
837	// arguments yet, but we do need to allow parsing them as recent CUDA
838	// headers rely on that. https://github.com/llvm/llvm-project/issues/58410
839	if (CudaInstallation.version() >= CudaVersion::CUDA_90)
840	CC1Args.push_back(Elt: "-fcuda-allow-variadic-functions");
841	}
842
843	if (DriverArgs.hasArg(Ids: options::OPT_nogpulib))
844	return;
845
846	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
847	DriverArgs.hasArg(Ids: options::OPT_S))
848	return;
849
850	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Gpu: GpuArch);
851	if (LibDeviceFile.empty()) {
852	getDriver().Diag(DiagID: diag::err_drv_no_cuda_libdevice) << GpuArch;
853	return;
854	}
855
856	CC1Args.push_back(Elt: "-mlink-builtin-bitcode");
857	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: LibDeviceFile));
858
859	clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
860
861	if (DriverArgs.hasFlag(Pos: options::OPT_fcuda_short_ptr,
862	Neg: options::OPT_fno_cuda_short_ptr, Default: false))
863	CC1Args.append(IL: {"-mllvm", "--nvptx-short-ptr"});
864
865	if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
866	CC1Args.push_back(
867	Elt: DriverArgs.MakeArgString(Str: Twine ("-target-sdk-version=") +
868	CudaVersionToString(V: CudaInstallationVersion)));
869
870	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
871	if (CudaInstallationVersion < CudaVersion::CUDA_92) {
872	getDriver().Diag(
873	DiagID: diag::err_drv_omp_offload_target_cuda_version_not_support)
874	<< CudaVersionToString(V: CudaInstallationVersion);
875	return;
876	}
877
878	// Link the bitcode library late if we're using device LTO.
879	if (getDriver().isUsingLTO(/ IsOffload / true))
880	return;
881
882	addOpenMPDeviceRTL(D: getDriver(), DriverArgs, CC1Args, BitcodeSuffix: GpuArch.str(),
883	Triple: getTriple(), HostTC);
884	}
885	}
886
887	llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
888	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
889	const llvm::fltSemantics FPType) const* {
890	if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
891	if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
892	DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero,
893	Neg: options::OPT_fno_gpu_flush_denormals_to_zero, Default: false))
894	return llvm::DenormalMode::getPreserveSign();
895	}
896
897	assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
898	return llvm::DenormalMode::getIEEE();
899	}
900
901	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
902	ArgStringList &CC1Args) const {
903	// Check our CUDA version if we're going to include the CUDA headers.
904	if (!DriverArgs.hasArg(Ids: options::OPT_nogpuinc) &&
905	!DriverArgs.hasArg(Ids: options::OPT_no_cuda_version_check)) {
906	StringRef Arch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
907	assert(!Arch.empty() && "Must have an explicit GPU arch.");
908	CudaInstallation.CheckCudaVersionSupportsArch(Arch: StringToOffloadArch(S: Arch));
909	}
910	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
911	}
912
913	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
914	// Only object files are changed, for example assembly files keep their .s
915	// extensions. If the user requested device-only compilation don't change it.
916	if (Input.getType() != types::TY_Object \|\| getDriver().offloadDeviceOnly())
917	return ToolChain::getInputFilename(Input);
918
919	return ToolChain::getInputFilename(Input);
920	}
921
922	llvm::opt::DerivedArgList *
923	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
924	StringRef BoundArch,
925	Action::OffloadKind DeviceOffloadKind) const {
926	DerivedArgList *DAL =
927	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
928	if (!DAL)
929	DAL = new DerivedArgList (Args.getBaseArgs());
930
931	const OptTable &Opts = getDriver().getOpts();
932
933	// For OpenMP device offloading, append derived arguments. Make sure
934	// flags are not duplicated.
935	// Also append the compute capability.
936	if (DeviceOffloadKind == Action::OFK_OpenMP) {
937	for (Arg *A : Args)
938	if (!llvm::is_contained(Range&: *DAL, Element: A))
939	DAL->append(A);
940
941	if (!DAL->hasArg(Ids: options::OPT_march_EQ)) {
942	StringRef Arch = BoundArch;
943	if (Arch.empty()) {
944	auto ArchsOrErr = getSystemGPUArchs(Args);
945	if (!ArchsOrErr) {
946	std::string ErrMsg =
947	llvm::formatv(Fmt: "{0}", Vals: llvm::fmt_consume(Item: ArchsOrErr.takeError()));
948	getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch)
949	<< llvm::Triple::getArchTypeName(Kind: getArch()) << ErrMsg << "-march";
950	Arch = OffloadArchToString(A: OffloadArch::CudaDefault);
951	} else {
952	Arch = Args.MakeArgString(Str: ArchsOrErr ->front());
953	}
954	}
955	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ), Value: Arch);
956	}
957
958	return DAL;
959	}
960
961	for (Arg *A : Args) {
962	// Make sure flags are not duplicated.
963	if (!llvm::is_contained(Range&: *DAL, Element: A)) {
964	DAL->append(A);
965	}
966	}
967
968	if (!BoundArch.empty()) {
969	DAL->eraseArg(Id: options::OPT_march_EQ);
970	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
971	Value: BoundArch);
972	}
973	return DAL;
974	}
975
976	Tool NVPTXToolChain::buildAssembler() const* {
977	return new tools::NVPTX::Assembler (*this);
978	}
979
980	Tool NVPTXToolChain::buildLinker() const* {
981	return new tools::NVPTX::Linker (*this);
982	}
983
984	Tool CudaToolChain::buildAssembler() const* {
985	return new tools::NVPTX::Assembler (*this);
986	}
987
988	Tool CudaToolChain::buildLinker() const* {
989	return new tools::NVPTX::FatBinary (*this);
990	}
991
992	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
993	HostTC.addClangWarningOptions(CC1Args);
994	}
995
996	ToolChain::CXXStdlibType
997	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
998	return HostTC.GetCXXStdlibType(Args);
999	}
1000
1001	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1002	ArgStringList &CC1Args) const {
1003	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1004
1005	if (!DriverArgs.hasArg(Ids: options::OPT_nogpuinc) && CudaInstallation.isValid())
1006	CC1Args.append(
1007	IL: {"-internal-isystem",
1008	DriverArgs.MakeArgString(Str: CudaInstallation.getIncludePath())});
1009	}
1010
1011	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1012	ArgStringList &CC1Args) const {
1013	HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
1014	}
1015
1016	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1017	ArgStringList &CC1Args) const {
1018	HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
1019	}
1020
1021	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
1022	// The CudaToolChain only supports sanitizers in the sense that it allows
1023	// sanitizer arguments on the command line if they are supported by the host
1024	// toolchain. The CudaToolChain will actually ignore any command line
1025	// arguments for any of these "supported" sanitizers. That means that no
1026	// sanitization of device code is actually supported at this time.
1027	//
1028	// This behavior is necessary because the host and device toolchains
1029	// invocations often share the command line, so the device toolchain must
1030	// tolerate flags meant only for the host toolchain.
1031	return HostTC.getSupportedSanitizers();
1032	}
1033
1034	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1035	const ArgList &Args) const {
1036	return HostTC.computeMSVCVersion(D, Args);
1037	}
1038

Browse the source code of llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp