Cuda.cpp source code [llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp]

1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "Cuda.h"
10	#include "clang/Basic/Cuda.h"
11	#include "clang/Config/config.h"
12	#include "clang/Driver/CommonArgs.h"
13	#include "clang/Driver/Compilation.h"
14	#include "clang/Driver/Distro.h"
15	#include "clang/Driver/Driver.h"
16	#include "clang/Driver/InputInfo.h"
17	#include "clang/Options/Options.h"
18	#include "llvm/ADT/SmallSet.h"
19	#include "llvm/ADT/StringExtras.h"
20	#include "llvm/Config/llvm-config.h" // for LLVM_HOST_TRIPLE
21	#include "llvm/Option/ArgList.h"
22	#include "llvm/Support/FileSystem.h"
23	#include "llvm/Support/Path.h"
24	#include "llvm/Support/Process.h"
25	#include "llvm/Support/Program.h"
26	#include "llvm/Support/VirtualFileSystem.h"
27	#include "llvm/TargetParser/Host.h"
28	#include "llvm/TargetParser/TargetParser.h"
29	#include <system_error>
30
31	using namespace clang::driver;
32	using namespace clang::driver::toolchains;
33	using namespace clang::driver::tools;
34	using namespace clang;
35	using namespace llvm::opt;
36
37	namespace {
38
39	CudaVersion getCudaVersion(uint32_t raw_version) {
40	if (raw_version < `7050`)
41	return CudaVersion::CUDA_70;
42	if (raw_version < `8000`)
43	return CudaVersion::CUDA_75;
44	if (raw_version < `9000`)
45	return CudaVersion::CUDA_80;
46	if (raw_version < `9010`)
47	return CudaVersion::CUDA_90;
48	if (raw_version < `9020`)
49	return CudaVersion::CUDA_91;
50	if (raw_version < `10000`)
51	return CudaVersion::CUDA_92;
52	if (raw_version < `10010`)
53	return CudaVersion::CUDA_100;
54	if (raw_version < `10020`)
55	return CudaVersion::CUDA_101;
56	if (raw_version < `11000`)
57	return CudaVersion::CUDA_102;
58	if (raw_version < `11010`)
59	return CudaVersion::CUDA_110;
60	if (raw_version < `11020`)
61	return CudaVersion::CUDA_111;
62	if (raw_version < `11030`)
63	return CudaVersion::CUDA_112;
64	if (raw_version < `11040`)
65	return CudaVersion::CUDA_113;
66	if (raw_version < `11050`)
67	return CudaVersion::CUDA_114;
68	if (raw_version < `11060`)
69	return CudaVersion::CUDA_115;
70	if (raw_version < `11070`)
71	return CudaVersion::CUDA_116;
72	if (raw_version < `11080`)
73	return CudaVersion::CUDA_117;
74	if (raw_version < `11090`)
75	return CudaVersion::CUDA_118;
76	if (raw_version < `12010`)
77	return CudaVersion::CUDA_120;
78	if (raw_version < `12020`)
79	return CudaVersion::CUDA_121;
80	if (raw_version < `12030`)
81	return CudaVersion::CUDA_122;
82	if (raw_version < `12040`)
83	return CudaVersion::CUDA_123;
84	if (raw_version < `12050`)
85	return CudaVersion::CUDA_124;
86	if (raw_version < `12060`)
87	return CudaVersion::CUDA_125;
88	if (raw_version < `12070`)
89	return CudaVersion::CUDA_126;
90	if (raw_version < `12090`)
91	return CudaVersion::CUDA_128;
92	if (raw_version < `13000`)
93	return CudaVersion::CUDA_129;
94	return CudaVersion::NEW;
95	}
96
97	CudaVersion parseCudaHFile(llvm::StringRef Input) {
98	// Helper lambda which skips the words if the line starts with them or returns
99	// std::nullopt otherwise.
100	auto StartsWithWords =
101	[](llvm::StringRef Line,
102	const SmallVector<StringRef, `3`> words) -> std::optional<StringRef> {
103	for (StringRef word : words) {
104	if (!Line.consume_front(Prefix: word))
105	return {};
106	Line = Line.ltrim();
107	}
108	return Line;
109	};
110
111	Input = Input.ltrim();
112	while (!Input.empty()) {
113	if (auto Line =
114	StartsWithWords (Input.ltrim(), {"#", "define", "CUDA_VERSION"})) {
115	uint32_t RawVersion;
116	Line ->consumeInteger(Radix: `10`, Result&: RawVersion);
117	return getCudaVersion(raw_version: RawVersion);
118	}
119	// Find next non-empty line.
120	Input = Input.drop_front(N: Input.find_first_of(Chars: "\n\r")).ltrim();
121	}
122	return CudaVersion::UNKNOWN;
123	}
124	} // namespace
125
126	void CudaInstallationDetector::WarnIfUnsupportedVersion() const {
127	if (Version > CudaVersion::PARTIALLY_SUPPORTED) {
128	std::string VersionString = CudaVersionToString(V: Version);
129	if (!VersionString.empty())
130	VersionString.insert(pos: `0`, s: " ");
131	D.Diag(DiagID: diag::warn_drv_new_cuda_version)
132	<< VersionString
133	<< (CudaVersion::PARTIALLY_SUPPORTED != CudaVersion::FULLY_SUPPORTED)
134	<< CudaVersionToString(V: CudaVersion::PARTIALLY_SUPPORTED);
135	} else if (Version > CudaVersion::FULLY_SUPPORTED)
136	D.Diag(DiagID: diag::warn_drv_partially_supported_cuda_version)
137	<< CudaVersionToString(V: Version);
138	}
139
140	CudaInstallationDetector::CudaInstallationDetector(
141	const Driver &D, const llvm::Triple &HostTriple,
142	const llvm::opt::ArgList &Args)
143	: D(D) {
144	struct Candidate {
145	std::string Path;
146	bool StrictChecking;
147
148	Candidate(std::string Path, bool StrictChecking = false)
149	: Path (Path), StrictChecking(StrictChecking) {}
150	};
151	SmallVector<Candidate, `4`> Candidates;
152
153	// In decreasing order so we prefer newer versions to older versions.
154	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
155	auto &FS = D.getVFS();
156
157	if (Args.hasArg(Ids: options::OPT_cuda_path_EQ)) {
158	Candidates.emplace_back(
159	Args: Args.getLastArgValue(Id: options::OPT_cuda_path_EQ).str());
160	} else if (HostTriple.isOSWindows()) {
161	for (const char *Ver : Versions)
162	Candidates.emplace_back(
163	Args: D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
164	Ver);
165	} else {
166	if (!Args.hasArg(Ids: options::OPT_cuda_path_ignore_env)) {
167	// Try to find ptxas binary. If the executable is located in a directory
168	// called 'bin/', its parent directory might be a good guess for a valid
169	// CUDA installation.
170	// However, some distributions might installs 'ptxas' to /usr/bin. In that
171	// case the candidate would be '/usr' which passes the following checks
172	// because '/usr/include' exists as well. To avoid this case, we always
173	// check for the directory potentially containing files for libdevice,
174	// even if the user passes -nocudalib.
175	if (llvm::ErrorOr<std::string> ptxas =
176	llvm::sys::findProgramByName(Name: "ptxas")) {
177	SmallString<`256`> ptxasAbsolutePath;
178	llvm::sys::fs::real_path(path: *ptxas, output&: ptxasAbsolutePath);
179
180	StringRef ptxasDir = llvm::sys::path::parent_path(path: ptxasAbsolutePath);
181	if (llvm::sys::path::filename(path: ptxasDir) == "bin")
182	Candidates.emplace_back(
183	Args: std::string (llvm::sys::path::parent_path(path: ptxasDir)),
184	/StrictChecking=/Args: true);
185	}
186	}
187
188	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda");
189	for (const char *Ver : Versions)
190	Candidates.emplace_back(Args: D.SysRoot + "/usr/local/cuda-" + Ver);
191
192	Distro Dist(FS, llvm::Triple (llvm::sys::getProcessTriple()));
193	if (Dist.IsDebian() \|\| Dist.IsUbuntu())
194	// Special case for Debian to have nvidia-cuda-toolkit work
195	// out of the box. More info on http://bugs.debian.org/882505
196	Candidates.emplace_back(Args: D.SysRoot + "/usr/lib/cuda");
197	}
198
199	bool NoCudaLib =
200	!Args.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib, Default: true);
201
202	for (const auto &Candidate : Candidates) {
203	InstallPath = Candidate.Path;
204	if (InstallPath.empty() \|\| !FS.exists(Path: InstallPath))
205	continue;
206
207	BinPath = InstallPath + "/bin";
208	IncludePath = InstallPath + "/include";
209	LibDevicePath = InstallPath + "/nvvm/libdevice";
210
211	if (!(FS.exists(Path: IncludePath) && FS.exists(Path: BinPath)))
212	continue;
213	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
214	if (CheckLibDevice && !FS.exists(Path: LibDevicePath))
215	continue;
216
217	Version = CudaVersion::UNKNOWN;
218	if (auto CudaHFile = FS.getBufferForFile(Name: InstallPath + "/include/cuda.h"))
219	Version = parseCudaHFile(Input: (*CudaHFile)->getBuffer());
220	// As the last resort, make an educated guess between CUDA-7.0, which had
221	// old-style libdevice bitcode, and an unknown recent CUDA version.
222	if (Version == CudaVersion::UNKNOWN) {
223	Version = FS.exists(Path: LibDevicePath + "/libdevice.10.bc")
224	? CudaVersion::NEW
225	: CudaVersion::CUDA_70;
226	}
227
228	if (Version >= CudaVersion::CUDA_90) {
229	// CUDA-9+ uses single libdevice file for all GPU variants.
230	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
231	if (FS.exists(Path: FilePath)) {
232	for (int Arch = (int)OffloadArch::SM_30, E = (int)OffloadArch::LAST;
233	Arch < E; ++Arch) {
234	OffloadArch OA = static_cast<OffloadArch>(Arch);
235	if (!IsNVIDIAOffloadArch(A: OA))
236	continue;
237	std::string OffloadArchName(OffloadArchToString(A: OA));
238	LibDeviceMap [OffloadArchName] = FilePath;
239	}
240	}
241	} else {
242	std::error_code EC;
243	for (llvm::vfs::directory_iterator LI = FS.dir_begin(Dir: LibDevicePath, EC),
244	LE;
245	!EC && LI != LE; LI = LI.increment(EC)) {
246	StringRef FilePath = LI ->path();
247	StringRef FileName = llvm::sys::path::filename(path: FilePath);
248	// Process all bitcode filenames that look like
249	// libdevice.compute_XX.YY.bc
250	const StringRef LibDeviceName = "libdevice.";
251	if (!(FileName.starts_with(Prefix: LibDeviceName) && FileName.ends_with(Suffix: ".bc")))
252	continue;
253	StringRef GpuArch = FileName.slice(
254	Start: LibDeviceName.size(), End: FileName.find(C: `'.'`, From: LibDeviceName.size()));
255	LibDeviceMap [GpuArch] = FilePath.str();
256	// Insert map entries for specific devices with this compute
257	// capability. NVCC's choice of the libdevice library version is
258	// rather peculiar and depends on the CUDA version.
259	if (GpuArch == "compute_20") {
260	LibDeviceMap ["sm_20"] = std::string (FilePath);
261	LibDeviceMap ["sm_21"] = std::string (FilePath);
262	LibDeviceMap ["sm_32"] = std::string (FilePath);
263	} else if (GpuArch == "compute_30") {
264	LibDeviceMap ["sm_30"] = std::string (FilePath);
265	if (Version < CudaVersion::CUDA_80) {
266	LibDeviceMap ["sm_50"] = std::string (FilePath);
267	LibDeviceMap ["sm_52"] = std::string (FilePath);
268	LibDeviceMap ["sm_53"] = std::string (FilePath);
269	}
270	LibDeviceMap ["sm_60"] = std::string (FilePath);
271	LibDeviceMap ["sm_61"] = std::string (FilePath);
272	LibDeviceMap ["sm_62"] = std::string (FilePath);
273	} else if (GpuArch == "compute_35") {
274	LibDeviceMap ["sm_35"] = std::string (FilePath);
275	LibDeviceMap ["sm_37"] = std::string (FilePath);
276	} else if (GpuArch == "compute_50") {
277	if (Version >= CudaVersion::CUDA_80) {
278	LibDeviceMap ["sm_50"] = std::string (FilePath);
279	LibDeviceMap ["sm_52"] = std::string (FilePath);
280	LibDeviceMap ["sm_53"] = std::string (FilePath);
281	}
282	}
283	}
284	}
285
286	// Check that we have found at least one libdevice that we can link in if
287	// -nocudalib hasn't been specified.
288	if (LibDeviceMap.empty() && !NoCudaLib)
289	continue;
290
291	IsValid = true;
292	break;
293	}
294	}
295
296	void CudaInstallationDetector::AddCudaIncludeArgs(
297	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
298	if (!DriverArgs.hasArg(Ids: options::OPT_nobuiltininc)) {
299	// Add cuda_wrappers/ to our system include path. This lets us wrap*
300	// standard library headers.
301	SmallString<`128`> P(D.ResourceDir);
302	llvm::sys::path::append(path&: P, a: "include");
303	llvm::sys::path::append(path&: P, a: "cuda_wrappers");
304	CC1Args.push_back(Elt: "-internal-isystem");
305	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: P));
306	}
307
308	if (!DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
309	Default: true))
310	return;
311
312	if (!isValid()) {
313	D.Diag(DiagID: diag::err_drv_no_cuda_installation);
314	return;
315	}
316
317	CC1Args.push_back(Elt: "-include");
318	CC1Args.push_back(Elt: "__clang_cuda_runtime_wrapper.h");
319	}
320
321	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
322	OffloadArch Arch) const {
323	if (Arch == OffloadArch::Unknown \|\| Version == CudaVersion::UNKNOWN \|\|
324	ArchsWithBadVersion [(int)Arch])
325	return;
326
327	auto MinVersion = MinVersionForOffloadArch(A: Arch);
328	auto MaxVersion = MaxVersionForOffloadArch(A: Arch);
329	if (Version < MinVersion \|\| Version > MaxVersion) {
330	ArchsWithBadVersion [(int)Arch] = true;
331	D.Diag(DiagID: diag::err_drv_cuda_version_unsupported)
332	<< OffloadArchToString(A: Arch) << CudaVersionToString(V: MinVersion)
333	<< CudaVersionToString(V: MaxVersion) << InstallPath
334	<< CudaVersionToString(V: Version);
335	}
336	}
337
338	void CudaInstallationDetector::print(raw_ostream &OS) const {
339	if (isValid())
340	OS << "Found CUDA installation: " << InstallPath << ", version "
341	<< CudaVersionToString(V: Version) << "\n";
342	}
343
344	namespace {
345	/// Debug info level for the NVPTX devices. We may need to emit different debug
346	/// info level for the host and for the device itselfi. This type controls
347	/// emission of the debug info for the devices. It either prohibits disable info
348	/// emission completely, or emits debug directives only, or emits same debug
349	/// info as for the host.
350	enum DeviceDebugInfoLevel {
351	DisableDebugInfo, /// Do not emit debug info for the devices.
352	DebugDirectivesOnly, /// Emit only debug directives.
353	EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
354	/// host.
355	};
356	} // anonymous namespace
357
358	/// Define debug info level for the NVPTX devices. If the debug info for both
359	/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
360	/// only debug directives are requested for the both host and device
361	/// (-gline-directvies-only), or the debug info only for the device is disabled
362	/// (optimization is on and --cuda-noopt-device-debug was not specified), the
363	/// debug directves only must be emitted for the device. Otherwise, use the same
364	/// debug info level just like for the host (with the limitations of only
365	/// supported DWARF2 standard).
366	static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
367	const Arg *A = Args.getLastArg(Ids: options::OPT_O_Group);
368	bool IsDebugEnabled = !A \|\| A->getOption().matches(ID: options::OPT_O0) \|\|
369	Args.hasFlag(Pos: options::OPT_cuda_noopt_device_debug,
370	Neg: options::OPT_no_cuda_noopt_device_debug,
371	/Default=/false);
372	if (const Arg *A = Args.getLastArg(Ids: options::OPT_g_Group)) {
373	const Option &Opt = A->getOption();
374	if (Opt.matches(ID: options::OPT_gN_Group)) {
375	if (Opt.matches(ID: options::OPT_g0) \|\| Opt.matches(ID: options::OPT_ggdb0))
376	return DisableDebugInfo;
377	if (Opt.matches(ID: options::OPT_gline_directives_only))
378	return DebugDirectivesOnly;
379	}
380	return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
381	}
382	return willEmitRemarks(Args) ? DebugDirectivesOnly : DisableDebugInfo;
383	}
384
385	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
386	const InputInfo &Output,
387	const InputInfoList &Inputs,
388	const ArgList &Args,
389	const char LinkingOutput) const* {
390	const auto &TC =
391	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
392	assert(TC.getTriple().isNVPTX() && "Wrong platform");
393
394	StringRef GPUArchName;
395	// If this is a CUDA action we need to extract the device architecture
396	// from the Job's associated architecture, otherwise use the -march=arch
397	// option. This option may come from -Xopenmp-target flag or the default
398	// value.
399	if (JA.isDeviceOffloading(OKind: Action::OFK_Cuda)) {
400	GPUArchName = JA.getOffloadingArch();
401	} else {
402	GPUArchName = Args.getLastArgValue(Id: options::OPT_march_EQ);
403	if (GPUArchName.empty()) {
404	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
405	<< getToolChain().getArchName() << getShortName();
406	return;
407	}
408	}
409
410	// Obtain architecture from the action.
411	OffloadArch gpu_arch = StringToOffloadArch(S: GPUArchName);
412	assert(gpu_arch != OffloadArch::Unknown &&
413	"Device action expected to have an architecture.");
414
415	// Check that our installation's ptxas supports gpu_arch.
416	if (!Args.hasArg(Ids: options::OPT_no_cuda_version_check)) {
417	TC.CudaInstallation.CheckCudaVersionSupportsArch(Arch: gpu_arch);
418	}
419
420	ArgStringList CmdArgs;
421	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
422	DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
423	if (DIKind == EmitSameDebugInfoAsHost) {
424	// ptxas does not accept -g option if optimization is enabled, so
425	// we ignore the compiler's -O options if we want debug info.*
426	CmdArgs.push_back(Elt: "-g");
427	CmdArgs.push_back(Elt: "--dont-merge-basicblocks");
428	CmdArgs.push_back(Elt: "--return-at-end");
429	} else if (Arg *A = Args.getLastArg(Ids: options::OPT_O_Group)) {
430	// Map the -O we received to -O{0,1,2,3}.
431	//
432	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
433	// default, so it may correspond more closely to the spirit of clang -O2.
434
435	// -O3 seems like the least-bad option when -Osomething is specified to
436	// clang but it isn't handled below.
437	StringRef OOpt = "3";
438	if (A->getOption().matches(ID: options::OPT_O4) \|\|
439	A->getOption().matches(ID: options::OPT_Ofast))
440	OOpt = "3";
441	else if (A->getOption().matches(ID: options::OPT_O0))
442	OOpt = "0";
443	else if (A->getOption().matches(ID: options::OPT_O)) {
444	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
445	OOpt = llvm::StringSwitch<const char *>(A->getValue())
446	.Case(S: "1", Value: "1")
447	.Case(S: "2", Value: "2")
448	.Case(S: "3", Value: "3")
449	.Case(S: "s", Value: "2")
450	.Case(S: "z", Value: "2")
451	.Default(Value: "2");
452	}
453	CmdArgs.push_back(Elt: Args.MakeArgString(Str: llvm::Twine ("-O") + OOpt));
454	} else {
455	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
456	// to no optimizations, but ptxas's default is -O3.
457	CmdArgs.push_back(Elt: "-O0");
458	}
459	if (DIKind == DebugDirectivesOnly)
460	CmdArgs.push_back(Elt: "-lineinfo");
461
462	// Pass -v to ptxas if it was passed to the driver.
463	if (Args.hasArg(Ids: options::OPT_v))
464	CmdArgs.push_back(Elt: "-v");
465
466	CmdArgs.push_back(Elt: "--gpu-name");
467	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OffloadArchToString(A: gpu_arch)));
468	CmdArgs.push_back(Elt: "--output-file");
469	std::string OutputFileName = TC.getInputFilename(Input: Output);
470
471	if (Output.isFilename() && OutputFileName != Output.getFilename())
472	C.addTempFile(Name: Args.MakeArgString(Str: OutputFileName));
473
474	CmdArgs.push_back(Elt: Args.MakeArgString(Str: OutputFileName));
475	for (const auto &II : Inputs)
476	CmdArgs.push_back(Elt: Args.MakeArgString(Str: II.getFilename()));
477
478	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_ptxas))
479	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
480
481	bool Relocatable;
482	if (JA.isOffloading(OKind: Action::OFK_OpenMP))
483	// In OpenMP we need to generate relocatable code.
484	Relocatable = Args.hasFlag(Pos: options::OPT_fopenmp_relocatable_target,
485	Neg: options::OPT_fnoopenmp_relocatable_target,
486	/Default=/true);
487	else if (JA.isOffloading(OKind: Action::OFK_Cuda))
488	// In CUDA we generate relocatable code by default.
489	Relocatable = Args.hasFlag(Pos: options::OPT_fgpu_rdc, Neg: options::OPT_fno_gpu_rdc,
490	/Default=/false);
491	else
492	// Otherwise, we are compiling directly and should create linkable output.
493	Relocatable = true;
494
495	if (Relocatable)
496	CmdArgs.push_back(Elt: "-c");
497
498	const char *Exec;
499	if (Arg *A = Args.getLastArg(Ids: options::OPT_ptxas_path_EQ))
500	Exec = A->getValue();
501	else
502	Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "ptxas"));
503	C.addCommand(C: std::make_unique<Command>(
504	args: JA, args: *this,
505	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
506	.ResponseFlag: "--options-file"},
507	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
508	}
509
510	static bool shouldIncludePTX(const ArgList &Args, StringRef InputArch) {
511	// The new driver does not include PTX by default to avoid overhead.
512	bool includePTX = !Args.hasFlag(Pos: options::OPT_offload_new_driver,
513	Neg: options::OPT_no_offload_new_driver, Default: true);
514	for (Arg *A : Args.filtered(Ids: options::OPT_cuda_include_ptx_EQ,
515	Ids: options::OPT_no_cuda_include_ptx_EQ)) {
516	A->claim();
517	const StringRef ArchStr = A->getValue();
518	if (A->getOption().matches(ID: options::OPT_cuda_include_ptx_EQ) &&
519	(ArchStr == "all" \|\| ArchStr == InputArch))
520	includePTX = true;
521	else if (A->getOption().matches(ID: options::OPT_no_cuda_include_ptx_EQ) &&
522	(ArchStr == "all" \|\| ArchStr == InputArch))
523	includePTX = false;
524	}
525	return includePTX;
526	}
527
528	// All inputs to this linker must be from CudaDeviceActions, as we need to look
529	// at the Inputs' Actions in order to figure out which GPU architecture they
530	// correspond to.
531	void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
532	const InputInfo &Output,
533	const InputInfoList &Inputs,
534	const ArgList &Args,
535	const char LinkingOutput) const* {
536	const auto &TC =
537	static_cast<const toolchains::CudaToolChain &>(getToolChain());
538	assert(TC.getTriple().isNVPTX() && "Wrong platform");
539
540	ArgStringList CmdArgs;
541	if (TC.CudaInstallation.version() <= CudaVersion::CUDA_100)
542	CmdArgs.push_back(Elt: "--cuda");
543	CmdArgs.push_back(Elt: TC.getTriple().isArch64Bit() ? "-64" : "-32");
544	CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--create"));
545	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Output.getFilename()));
546	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
547	CmdArgs.push_back(Elt: "-g");
548
549	for (const auto &II : Inputs) {
550	auto *A = II.getAction();
551	assert(A->getInputs().size() == `1` &&
552	"Device offload action is expected to have a single input");
553	StringRef GpuArch = A->getOffloadingArch();
554	assert(!GpuArch.empty() &&
555	"Device action expected to have associated a GPU architecture!");
556
557	if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, InputArch: GpuArch))
558	continue;
559	StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
560	CmdArgs.push_back(Elt: Args.MakeArgString(
561	Str: "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(N: `3`) +
562	",file=" + getToolChain().getInputFilename(Input: II)));
563	}
564
565	for (const auto &A : Args.getAllArgValues(Id: options::OPT_Xcuda_fatbinary))
566	CmdArgs.push_back(Elt: Args.MakeArgString(Str: A));
567
568	const char *Exec = Args.MakeArgString(Str: TC.GetProgramPath(Name: "fatbinary"));
569	C.addCommand(C: std::make_unique<Command>(
570	args: JA, args: *this,
571	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
572	.ResponseFlag: "--options-file"},
573	args&: Exec, args&: CmdArgs, args: Inputs, args: Output));
574	}
575
576	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
577	const InputInfo &Output,
578	const InputInfoList &Inputs,
579	const ArgList &Args,
580	const char LinkingOutput) const* {
581	const auto &TC =
582	static_cast<const toolchains::NVPTXToolChain &>(getToolChain());
583	ArgStringList CmdArgs;
584
585	assert(TC.getTriple().isNVPTX() && "Wrong platform");
586
587	assert((Output.isFilename() \|\| Output.isNothing()) && "Invalid output.");
588	if (Output.isFilename()) {
589	CmdArgs.push_back(Elt: "-o");
590	CmdArgs.push_back(Elt: Output.getFilename());
591	}
592
593	if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
594	CmdArgs.push_back(Elt: "-g");
595
596	if (Args.hasArg(Ids: options::OPT_v))
597	CmdArgs.push_back(Elt: "-v");
598
599	StringRef GPUArch = Args.getLastArgValue(Id: options::OPT_march_EQ);
600	if (GPUArch.empty() && !C.getDriver().isUsingLTO()) {
601	C.getDriver().Diag(DiagID: diag::err_drv_offload_missing_gpu_arch)
602	<< getToolChain().getArchName() << getShortName();
603	return;
604	}
605
606	if (!GPUArch.empty()) {
607	CmdArgs.push_back(Elt: "-arch");
608	CmdArgs.push_back(Elt: Args.MakeArgString(Str: GPUArch));
609	}
610
611	if (Args.hasArg(Ids: options::OPT_ptxas_path_EQ))
612	CmdArgs.push_back(Elt: Args.MakeArgString(
613	Str: "--pxtas-path=" + Args.getLastArgValue(Id: options::OPT_ptxas_path_EQ)));
614
615	if (Args.hasArg(Ids: options::OPT_cuda_path_EQ) \|\| TC.CudaInstallation.isValid()) {
616	StringRef CudaPath = Args.getLastArgValue(
617	Id: options::OPT_cuda_path_EQ,
618	Default: llvm::sys::path::parent_path(path: TC.CudaInstallation.getBinPath()));
619	CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--cuda-path=" + CudaPath));
620	}
621
622	// Add paths specified in LIBRARY_PATH environment variable as -L options.
623	addDirectoryList(Args, CmdArgs, ArgName: "-L", EnvVar: "LIBRARY_PATH");
624
625	// Add standard library search paths passed on the command line.
626	Args.AddAllArgs(Output&: CmdArgs, Id0: options::OPT_L);
627	getToolChain().AddFilePathLibArgs(Args, CmdArgs);
628	AddLinkerInputs(TC: getToolChain(), Inputs, Args, CmdArgs, JA);
629
630	if (C.getDriver().isUsingLTO())
631	addLTOOptions(ToolChain: getToolChain(), Args, CmdArgs, Output, Inputs,
632	IsThinLTO: C.getDriver().getLTOMode() == LTOK_Thin);
633
634	// Forward the PTX features if the nvlink-wrapper needs it.
635	std::vector<StringRef> Features;
636	getNVPTXTargetFeatures(D: C.getDriver(), Triple: getToolChain().getTriple(), Args,
637	Features);
638	CmdArgs.push_back(
639	Elt: Args.MakeArgString(Str: "--plugin-opt=-mattr=" + llvm::join(R&: Features, Separator: ",")));
640
641	// Add paths for the default clang library path.
642	SmallString<`256`> DefaultLibPath =
643	llvm::sys::path::parent_path(path: TC.getDriver().Dir);
644	llvm::sys::path::append(path&: DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
645	CmdArgs.push_back(Elt: Args.MakeArgString(Str: Twine("-L") + DefaultLibPath));
646
647	getToolChain().addProfileRTLibs(Args, CmdArgs);
648
649	if (Args.hasArg(Ids: options::OPT_stdlib))
650	CmdArgs.append(IL: {"-lc", "-lm"});
651	if (Args.hasArg(Ids: options::OPT_startfiles)) {
652	std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
653	if (!IncludePath)
654	IncludePath = "/lib";
655	SmallString<`128`> P(*IncludePath);
656	llvm::sys::path::append(path&: P, a: "crt1.o");
657	CmdArgs.push_back(Elt: Args.MakeArgString(Str: P));
658	}
659
660	C.addCommand(C: std::make_unique<Command>(
661	args: JA, args: *this,
662	args: ResponseFileSupport{.ResponseKind: ResponseFileSupport::RF_Full, .ResponseEncoding: llvm::sys::WEM_UTF8,
663	.ResponseFlag: "--options-file"},
664	args: Args.MakeArgString(Str: getToolChain().GetProgramPath(Name: "clang-nvlink-wrapper")),
665	args&: CmdArgs, args: Inputs, args: Output));
666	}
667
668	void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
669	const llvm::opt::ArgList &Args,
670	std::vector<StringRef> &Features) {
671	if (Args.hasArg(Ids: options::OPT_cuda_feature_EQ)) {
672	StringRef PtxFeature = Args.getLastArgValue(Id: options::OPT_cuda_feature_EQ);
673	Features.push_back(x: Args.MakeArgString(Str: PtxFeature));
674	return;
675	}
676	CudaInstallationDetector CudaInstallation(D, Triple, Args);
677
678	// New CUDA versions often introduce new instructions that are only supported
679	// by new PTX version, so we need to raise PTX level to enable them in NVPTX
680	// back-end.
681	const char PtxFeature = nullptr*;
682	switch (CudaInstallation.version()) {
683	#define CASE_CUDA_VERSION(CUDA_VER, PTX_VER) \
684	case CudaVersion::CUDA_##CUDA_VER: \
685	PtxFeature = "+ptx" #PTX_VER; \
686	break;
687	CASE_CUDA_VERSION(`129`, `88`);
688	CASE_CUDA_VERSION(`128`, `87`);
689	CASE_CUDA_VERSION(`126`, `85`);
690	CASE_CUDA_VERSION(`125`, `85`);
691	CASE_CUDA_VERSION(`124`, `84`);
692	CASE_CUDA_VERSION(`123`, `83`);
693	CASE_CUDA_VERSION(`122`, `82`);
694	CASE_CUDA_VERSION(`121`, `81`);
695	CASE_CUDA_VERSION(`120`, `80`);
696	CASE_CUDA_VERSION(`118`, `78`);
697	CASE_CUDA_VERSION(`117`, `77`);
698	CASE_CUDA_VERSION(`116`, `76`);
699	CASE_CUDA_VERSION(`115`, `75`);
700	CASE_CUDA_VERSION(`114`, `74`);
701	CASE_CUDA_VERSION(`113`, `73`);
702	CASE_CUDA_VERSION(`112`, `72`);
703	CASE_CUDA_VERSION(`111`, `71`);
704	CASE_CUDA_VERSION(`110`, `70`);
705	CASE_CUDA_VERSION(`102`, `65`);
706	CASE_CUDA_VERSION(`101`, `64`);
707	CASE_CUDA_VERSION(`100`, `63`);
708	CASE_CUDA_VERSION(`92`, `61`);
709	CASE_CUDA_VERSION(`91`, `61`);
710	CASE_CUDA_VERSION(`90`, `60`);
711	CASE_CUDA_VERSION(`80`, `50`);
712	CASE_CUDA_VERSION(`75`, `43`);
713	CASE_CUDA_VERSION(`70`, `42`);
714	#undef CASE_CUDA_VERSION
715	// TODO: Use specific CUDA version once it's public.
716	case clang::CudaVersion::NEW:
717	PtxFeature = "+ptx86";
718	break;
719	default:
720	// No PTX feature specified; let the backend choose based on the target SM.
721	break;
722	}
723	if (PtxFeature)
724	Features.push_back(x: PtxFeature);
725	}
726
727	/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
728	/// operates as a stand-alone version of the NVPTX tools without the host
729	/// toolchain.
730	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
731	const llvm::Triple &HostTriple,
732	const ArgList &Args)
733	: ToolChain (D, Triple, Args), CudaInstallation (D, HostTriple, Args) {
734	if (CudaInstallation.isValid())
735	getProgramPaths().push_back(Elt: std::string (CudaInstallation.getBinPath()));
736	// Lookup binaries into the driver directory, this is used to
737	// discover the 'nvptx-arch' executable.
738	getProgramPaths().push_back(Elt: getDriver().Dir);
739	}
740
741	/// We only need the host triple to locate the CUDA binary utilities, use the
742	/// system's default triple if not provided.
743	NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
744	const ArgList &Args)
745	: NVPTXToolChain (D, Triple, llvm::Triple (LLVM_HOST_TRIPLE), Args) {}
746
747	llvm::opt::DerivedArgList *
748	NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
749	StringRef BoundArch,
750	Action::OffloadKind OffloadKind) const {
751	DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind: OffloadKind);
752	if (!DAL)
753	DAL = new DerivedArgList (Args.getBaseArgs());
754
755	const OptTable &Opts = getDriver().getOpts();
756
757	for (Arg *A : Args)
758	if (!llvm::is_contained(Range&: *DAL, Element: A))
759	DAL->append(A);
760
761	if (!DAL->hasArg(Ids: options::OPT_march_EQ) && OffloadKind != Action::OFK_None) {
762	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
763	Value: OffloadArchToString(A: OffloadArch::CudaDefault));
764	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "generic" &&
765	OffloadKind == Action::OFK_None) {
766	DAL->eraseArg(Id: options::OPT_march_EQ);
767	} else if (DAL->getLastArgValue(Id: options::OPT_march_EQ) == "native") {
768	auto GPUsOrErr = getSystemGPUArchs(Args);
769	if (!GPUsOrErr) {
770	getDriver().Diag(DiagID: diag::err_drv_undetermined_gpu_arch)
771	<< getArchName() << llvm::toString(E: GPUsOrErr.takeError()) << "-march";
772	} else {
773	auto &GPUs = *GPUsOrErr;
774	if (llvm::SmallSet<std::string, `1`>(GPUs.begin(), GPUs.end()).size() > `1`)
775	getDriver().Diag(DiagID: diag::warn_drv_multi_gpu_arch)
776	<< getArchName() << llvm::join(R&: GPUs, Separator: ", ") << "-march";
777	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
778	Value: Args.MakeArgString(Str: GPUs.front()));
779	}
780	}
781
782	return DAL;
783	}
784
785	void NVPTXToolChain::addClangTargetOptions(
786	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
787	Action::OffloadKind DeviceOffloadingKind) const {}
788
789	void NVPTXToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
790	ArgStringList &CC1Args) const {
791	if (DriverArgs.hasArg(Ids: options::OPT_nostdinc) \|\|
792	DriverArgs.hasArg(Ids: options::OPT_nostdlibinc))
793	return;
794
795	if (std::optional<std::string> Path = getStdlibIncludePath())
796	addSystemInclude(DriverArgs, CC1Args, Path: *Path);
797	}
798
799	bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg A) const* {
800	const Option &O = A->getOption();
801	return (O.matches(ID: options::OPT_gN_Group) &&
802	!O.matches(ID: options::OPT_gmodules)) \|\|
803	O.matches(ID: options::OPT_g_Flag) \|\|
804	O.matches(ID: options::OPT_ggdbN_Group) \|\| O.matches(ID: options::OPT_ggdb) \|\|
805	O.matches(ID: options::OPT_gdwarf) \|\| O.matches(ID: options::OPT_gdwarf_2) \|\|
806	O.matches(ID: options::OPT_gdwarf_3) \|\| O.matches(ID: options::OPT_gdwarf_4) \|\|
807	O.matches(ID: options::OPT_gdwarf_5) \|\|
808	O.matches(ID: options::OPT_gcolumn_info);
809	}
810
811	void NVPTXToolChain::adjustDebugInfoKind(
812	llvm::codegenoptions::DebugInfoKind &DebugInfoKind,
813	const ArgList &Args) const {
814	switch (mustEmitDebugInfo(Args)) {
815	case DisableDebugInfo:
816	DebugInfoKind = llvm::codegenoptions::NoDebugInfo;
817	break;
818	case DebugDirectivesOnly:
819	DebugInfoKind = llvm::codegenoptions::DebugDirectivesOnly;
820	break;
821	case EmitSameDebugInfoAsHost:
822	// Use same debug info level as the host.
823	break;
824	}
825	}
826
827	Expected<SmallVector<std::string>>
828	NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
829	// Detect NVIDIA GPUs availible on the system.
830	std::string Program;
831	if (Arg *A = Args.getLastArg(Ids: options::OPT_offload_arch_tool_EQ))
832	Program = A->getValue();
833	else
834	Program = GetProgramPath(Name: "nvptx-arch");
835
836	auto StdoutOrErr = getDriver().executeProgram(Args: {Program});
837	if (!StdoutOrErr)
838	return StdoutOrErr.takeError();
839
840	SmallVector<std::string, `1`> GPUArchs;
841	for (StringRef Arch : llvm::split(Str: (*StdoutOrErr)->getBuffer(), Separator: "\n"))
842	if (!Arch.empty())
843	GPUArchs.push_back(Elt: Arch.str());
844
845	if (GPUArchs.empty())
846	return llvm::createStringError(EC: std::error_code (),
847	S: "No NVIDIA GPU detected in the system");
848
849	return std::move(GPUArchs);
850	}
851
852	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
853	/// which isn't properly a linker but nonetheless performs the step of stitching
854	/// together object files from the assembler into a single blob.
855
856	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
857	const ToolChain &HostTC, const ArgList &Args)
858	: NVPTXToolChain (D, Triple, HostTC.getTriple(), Args), HostTC(HostTC) {}
859
860	void CudaToolChain::addClangTargetOptions(
861	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
862	Action::OffloadKind DeviceOffloadingKind) const {
863	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadKind: DeviceOffloadingKind);
864
865	StringRef GpuArch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
866	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
867	DeviceOffloadingKind == Action::OFK_Cuda) &&
868	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
869
870	CC1Args.append(IL: {"-fcuda-is-device", "-mllvm",
871	"-enable-memcpyopt-without-libcalls",
872	"-fno-threadsafe-statics"});
873
874	if (DriverArgs.hasFlag(Pos: options::OPT_fcuda_short_ptr,
875	Neg: options::OPT_fno_cuda_short_ptr, Default: false))
876	CC1Args.append(IL: {"-mllvm", "--nvptx-short-ptr"});
877
878	if (!DriverArgs.hasFlag(Pos: options::OPT_offloadlib, Neg: options::OPT_no_offloadlib,
879	Default: true))
880	return;
881
882	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
883	DriverArgs.hasArg(Ids: options::OPT_S))
884	return;
885
886	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Gpu: GpuArch);
887	if (LibDeviceFile.empty()) {
888	getDriver().Diag(DiagID: diag::err_drv_no_cuda_libdevice) << GpuArch;
889	return;
890	}
891
892	CC1Args.push_back(Elt: "-mlink-builtin-bitcode");
893	CC1Args.push_back(Elt: DriverArgs.MakeArgString(Str: LibDeviceFile));
894
895	// For now, we don't use any Offload/OpenMP device runtime when we offload
896	// CUDA via LLVM/Offload. We should split the Offload/OpenMP device runtime
897	// and include the "generic" (or CUDA-specific) parts.
898	if (DriverArgs.hasFlag(Pos: options::OPT_foffload_via_llvm,
899	Neg: options::OPT_fno_offload_via_llvm, Default: false))
900	return;
901
902	clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
903
904	if (CudaInstallationVersion >= CudaVersion::UNKNOWN)
905	CC1Args.push_back(
906	Elt: DriverArgs.MakeArgString(Str: Twine("-target-sdk-version=") +
907	CudaVersionToString(V: CudaInstallationVersion)));
908
909	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
910	if (CudaInstallationVersion < CudaVersion::CUDA_92) {
911	getDriver().Diag(
912	DiagID: diag::err_drv_omp_offload_target_cuda_version_not_support)
913	<< CudaVersionToString(V: CudaInstallationVersion);
914	return;
915	}
916
917	// Link the bitcode library late if we're using device LTO.
918	if (getDriver().isUsingOffloadLTO())
919	return;
920
921	addOpenMPDeviceRTL(D: getDriver(), DriverArgs, CC1Args, BitcodeSuffix: GpuArch.str(),
922	Triple: getTriple(), HostTC);
923	}
924	}
925
926	llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
927	const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
928	const llvm::fltSemantics FPType) const* {
929	if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
930	if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
931	DriverArgs.hasFlag(Pos: options::OPT_fgpu_flush_denormals_to_zero,
932	Neg: options::OPT_fno_gpu_flush_denormals_to_zero, Default: false))
933	return llvm::DenormalMode::getPreserveSign();
934	}
935
936	assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
937	return llvm::DenormalMode::getIEEE();
938	}
939
940	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
941	ArgStringList &CC1Args) const {
942	// Check our CUDA version if we're going to include the CUDA headers.
943	if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
944	Default: true) &&
945	!DriverArgs.hasArg(Ids: options::OPT_no_cuda_version_check)) {
946	StringRef Arch = DriverArgs.getLastArgValue(Id: options::OPT_march_EQ);
947	assert(!Arch.empty() && "Must have an explicit GPU arch.");
948	CudaInstallation.CheckCudaVersionSupportsArch(Arch: StringToOffloadArch(S: Arch));
949	}
950	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
951	}
952
953	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
954	// Only object files are changed, for example assembly files keep their .s
955	// extensions. If the user requested device-only compilation don't change it.
956	if (Input.getType() != types::TY_Object \|\| getDriver().offloadDeviceOnly())
957	return ToolChain::getInputFilename(Input);
958
959	return ToolChain::getInputFilename(Input);
960	}
961
962	llvm::opt::DerivedArgList *
963	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
964	StringRef BoundArch,
965	Action::OffloadKind DeviceOffloadKind) const {
966	DerivedArgList *DAL =
967	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
968	if (!DAL)
969	DAL = new DerivedArgList (Args.getBaseArgs());
970
971	const OptTable &Opts = getDriver().getOpts();
972
973	for (Arg *A : Args) {
974	// Make sure flags are not duplicated.
975	if (!llvm::is_contained(Range&: *DAL, Element: A)) {
976	DAL->append(A);
977	}
978	}
979
980	if (!BoundArch.empty()) {
981	DAL->eraseArg(Id: options::OPT_march_EQ);
982	DAL->AddJoinedArg(BaseArg: nullptr, Opt: Opts.getOption(Opt: options::OPT_march_EQ),
983	Value: BoundArch);
984	}
985	return DAL;
986	}
987
988	Tool NVPTXToolChain::buildAssembler() const* {
989	return new tools::NVPTX::Assembler (*this);
990	}
991
992	Tool NVPTXToolChain::buildLinker() const* {
993	return new tools::NVPTX::Linker (*this);
994	}
995
996	Tool CudaToolChain::buildAssembler() const* {
997	return new tools::NVPTX::Assembler (*this);
998	}
999
1000	Tool CudaToolChain::buildLinker() const* {
1001	return new tools::NVPTX::FatBinary (*this);
1002	}
1003
1004	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
1005	HostTC.addClangWarningOptions(CC1Args);
1006	}
1007
1008	ToolChain::CXXStdlibType
1009	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
1010	return HostTC.GetCXXStdlibType(Args);
1011	}
1012
1013	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
1014	ArgStringList &CC1Args) const {
1015	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
1016
1017	if (DriverArgs.hasFlag(Pos: options::OPT_offload_inc, Neg: options::OPT_no_offload_inc,
1018	Default: true) &&
1019	CudaInstallation.isValid())
1020	CC1Args.append(
1021	IL: {"-internal-isystem",
1022	DriverArgs.MakeArgString(Str: CudaInstallation.getIncludePath())});
1023	}
1024
1025	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
1026	ArgStringList &CC1Args) const {
1027	HostTC.AddClangCXXStdlibIncludeArgs(DriverArgs: Args, CC1Args);
1028	}
1029
1030	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
1031	ArgStringList &CC1Args) const {
1032	HostTC.AddIAMCUIncludeArgs(DriverArgs: Args, CC1Args);
1033	}
1034
1035	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
1036	// The CudaToolChain only supports sanitizers in the sense that it allows
1037	// sanitizer arguments on the command line if they are supported by the host
1038	// toolchain. The CudaToolChain will actually ignore any command line
1039	// arguments for any of these "supported" sanitizers. That means that no
1040	// sanitization of device code is actually supported at this time.
1041	//
1042	// This behavior is necessary because the host and device toolchains
1043	// invocations often share the command line, so the device toolchain must
1044	// tolerate flags meant only for the host toolchain.
1045	return HostTC.getSupportedSanitizers();
1046	}
1047
1048	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
1049	const ArgList &Args) const {
1050	return HostTC.computeMSVCVersion(D, Args);
1051	}
1052

Browse the source code of llvm_projects/clang/lib/Driver/ToolChains/Cuda.cpp