1//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This tool works as a wrapper over a linking job. This tool is used to create
10// linked device images for offloading. It scans the linker's input for embedded
11// device offloading data stored in sections `.llvm.offloading` and extracts it
12// as a temporary file. The extracted device files will then be passed to a
13// device linking job to create a final device image.
14//
15//===---------------------------------------------------------------------===//
16
17#include "clang/Basic/TargetID.h"
18#include "clang/Basic/Version.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/Bitcode/BitcodeWriter.h"
22#include "llvm/CodeGen/CommandFlags.h"
23#include "llvm/Frontend/Offloading/OffloadWrapper.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DiagnosticPrinter.h"
27#include "llvm/IR/Module.h"
28#include "llvm/IRReader/IRReader.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/MC/TargetRegistry.h"
31#include "llvm/Object/Archive.h"
32#include "llvm/Object/ArchiveWriter.h"
33#include "llvm/Object/Binary.h"
34#include "llvm/Object/ELFObjectFile.h"
35#include "llvm/Object/IRObjectFile.h"
36#include "llvm/Object/ObjectFile.h"
37#include "llvm/Object/OffloadBinary.h"
38#include "llvm/Option/ArgList.h"
39#include "llvm/Option/OptTable.h"
40#include "llvm/Option/Option.h"
41#include "llvm/Plugins/PassPlugin.h"
42#include "llvm/Remarks/HotnessThresholdParser.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/Support/Errc.h"
45#include "llvm/Support/FileOutputBuffer.h"
46#include "llvm/Support/FileSystem.h"
47#include "llvm/Support/InitLLVM.h"
48#include "llvm/Support/MemoryBuffer.h"
49#include "llvm/Support/Parallel.h"
50#include "llvm/Support/Path.h"
51#include "llvm/Support/Program.h"
52#include "llvm/Support/Signals.h"
53#include "llvm/Support/SourceMgr.h"
54#include "llvm/Support/StringSaver.h"
55#include "llvm/Support/TargetSelect.h"
56#include "llvm/Support/TimeProfiler.h"
57#include "llvm/Support/WithColor.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Target/TargetMachine.h"
60#include "llvm/TargetParser/Host.h"
61#include <atomic>
62#include <optional>
63
64using namespace llvm;
65using namespace llvm::opt;
66using namespace llvm::object;
67
68// Various tools (e.g., llc and opt) duplicate this series of declarations for
69// options related to passes and remarks.
70
71static cl::opt<bool> RemarksWithHotness(
72 "pass-remarks-with-hotness",
73 cl::desc("With PGO, include profile count in optimization remarks"),
74 cl::Hidden);
75
76static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
77 RemarksHotnessThreshold(
78 "pass-remarks-hotness-threshold",
79 cl::desc("Minimum profile count required for "
80 "an optimization remark to be output. "
81 "Use 'auto' to apply the threshold from profile summary."),
82 cl::value_desc("N or 'auto'"), cl::init(Val: 0), cl::Hidden);
83
84static cl::opt<std::string>
85 RemarksFilename("pass-remarks-output",
86 cl::desc("Output filename for pass remarks"),
87 cl::value_desc("filename"));
88
89static cl::opt<std::string>
90 RemarksPasses("pass-remarks-filter",
91 cl::desc("Only record optimization remarks from passes whose "
92 "names match the given regular expression"),
93 cl::value_desc("regex"));
94
95static cl::opt<std::string> RemarksFormat(
96 "pass-remarks-format",
97 cl::desc("The format used for serializing remarks (default: YAML)"),
98 cl::value_desc("format"), cl::init(Val: "yaml"));
99
100static cl::list<std::string>
101 PassPlugins("load-pass-plugin",
102 cl::desc("Load passes from plugin library"));
103
104static cl::opt<std::string> PassPipeline(
105 "passes",
106 cl::desc(
107 "A textual description of the pass pipeline. To have analysis passes "
108 "available before a certain pass, add 'require<foo-analysis>'. "
109 "'-passes' overrides the pass pipeline (but not all effects) from "
110 "specifying '--opt-level=O?' (O2 is the default) to "
111 "clang-linker-wrapper. Be sure to include the corresponding "
112 "'default<O?>' in '-passes'."));
113static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
114 cl::desc("Alias for -passes"));
115
116/// Path of the current binary.
117static const char *LinkerExecutable;
118
119/// Ssave intermediary results.
120static bool SaveTemps = false;
121
122/// Print arguments without executing.
123static bool DryRun = false;
124
125/// Print verbose output.
126static bool Verbose = false;
127
128/// Filename of the executable being created.
129static StringRef ExecutableName;
130
131/// Binary path for the CUDA installation.
132static std::string CudaBinaryPath;
133
134/// Mutex lock to protect writes to shared TempFiles in parallel.
135static std::mutex TempFilesMutex;
136
137/// Temporary files created by the linker wrapper.
138static std::list<SmallString<128>> TempFiles;
139
140/// Codegen flags for LTO backend.
141static codegen::RegisterCodeGenFlags CodeGenFlags;
142
143using OffloadingImage = OffloadBinary::OffloadingImage;
144
145namespace llvm {
146// Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
147template <> struct DenseMapInfo<OffloadKind> {
148 static inline OffloadKind getEmptyKey() { return OFK_LAST; }
149 static inline OffloadKind getTombstoneKey() {
150 return static_cast<OffloadKind>(OFK_LAST + 1);
151 }
152 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
153
154 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
155 return LHS == RHS;
156 }
157};
158} // namespace llvm
159
160namespace {
161using std::error_code;
162
163/// Must not overlap with llvm::opt::DriverFlag.
164enum WrapperFlags {
165 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
166 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
167};
168
169enum ID {
170 OPT_INVALID = 0, // This is not an option ID.
171#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
172#include "LinkerWrapperOpts.inc"
173 LastOption
174#undef OPTION
175};
176
177#define OPTTABLE_STR_TABLE_CODE
178#include "LinkerWrapperOpts.inc"
179#undef OPTTABLE_STR_TABLE_CODE
180
181#define OPTTABLE_PREFIXES_TABLE_CODE
182#include "LinkerWrapperOpts.inc"
183#undef OPTTABLE_PREFIXES_TABLE_CODE
184
185static constexpr OptTable::Info InfoTable[] = {
186#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
187#include "LinkerWrapperOpts.inc"
188#undef OPTION
189};
190
191class WrapperOptTable : public opt::GenericOptTable {
192public:
193 WrapperOptTable()
194 : opt::GenericOptTable(OptionStrTable, OptionPrefixesTable, InfoTable) {}
195};
196
197const OptTable &getOptTable() {
198 static const WrapperOptTable *Table = []() {
199 auto Result = std::make_unique<WrapperOptTable>();
200 return Result.release();
201 }();
202 return *Table;
203}
204
205void printCommands(ArrayRef<StringRef> CmdArgs) {
206 if (CmdArgs.empty())
207 return;
208
209 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
210 for (auto IC = std::next(x: CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
211 llvm::errs() << *IC << (std::next(x: IC) != IE ? " " : "\n");
212}
213
214[[noreturn]] void reportError(Error E) {
215 outs().flush();
216 logAllUnhandledErrors(E: std::move(E),
217 OS&: WithColor::error(OS&: errs(), Prefix: LinkerExecutable));
218 exit(EXIT_FAILURE);
219}
220
221std::string getMainExecutable(const char *Name) {
222 void *Ptr = (void *)(intptr_t)&getMainExecutable;
223 auto COWPath = sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr);
224 return sys::path::parent_path(path: COWPath).str();
225}
226
227/// Get a temporary filename suitable for output.
228Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
229 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
230 SmallString<128> OutputFile;
231 std::string PrefixStr = clang::sanitizeTargetIDInFileName(TargetID: Prefix.str());
232
233 if (SaveTemps) {
234 (PrefixStr + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile);
235 } else {
236 if (std::error_code EC =
237 sys::fs::createTemporaryFile(Prefix: PrefixStr, Suffix: Extension, ResultPath&: OutputFile))
238 return createFileError(F: OutputFile, EC);
239 }
240
241 TempFiles.emplace_back(args: std::move(OutputFile));
242 return TempFiles.back();
243}
244
245/// Execute the command \p ExecutablePath with the arguments \p Args.
246Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
247 if (Verbose || DryRun)
248 printCommands(CmdArgs: Args);
249
250 if (!DryRun)
251 if (sys::ExecuteAndWait(Program: ExecutablePath, Args))
252 return createStringError(
253 Fmt: "'%s' failed", Vals: sys::path::filename(path: ExecutablePath).str().c_str());
254 return Error::success();
255}
256
257Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
258
259 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
260 if (!Path)
261 Path = sys::findProgramByName(Name);
262 if (!Path && DryRun)
263 return Name.str();
264 if (!Path)
265 return createStringError(EC: Path.getError(),
266 S: "Unable to find '" + Name + "' in path");
267 return *Path;
268}
269
270bool linkerSupportsLTO(const ArgList &Args) {
271 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
272 return Triple.isNVPTX() || Triple.isAMDGPU() ||
273 (!Triple.isGPU() &&
274 Args.getLastArgValue(Id: OPT_linker_path_EQ).ends_with(Suffix: "lld"));
275}
276
277/// Returns the hashed value for a constant string.
278std::string getHash(StringRef Str) {
279 llvm::MD5 Hasher;
280 llvm::MD5::MD5Result Hash;
281 Hasher.update(Str);
282 Hasher.final(Result&: Hash);
283 return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true);
284}
285
286/// Renames offloading entry sections in a relocatable link so they do not
287/// conflict with a later link job.
288Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
289 llvm::Triple Triple(
290 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
291 if (Triple.isOSWindows())
292 return createStringError(
293 Fmt: "Relocatable linking is not supported on COFF targets");
294
295 Expected<std::string> ObjcopyPath =
296 findProgram(Name: "llvm-objcopy", Paths: {getMainExecutable(Name: "llvm-objcopy")});
297 if (!ObjcopyPath)
298 return ObjcopyPath.takeError();
299
300 // Use the linker output file to get a unique hash. This creates a unique
301 // identifier to rename the sections to that is deterministic to the contents.
302 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "")
303 : MemoryBuffer::getFileOrSTDIN(Filename: Output);
304 if (!BufferOrErr)
305 return createStringError(Fmt: "Failed to open %s", Vals: Output.str().c_str());
306 std::string Suffix = "_" + getHash(Str: (*BufferOrErr)->getBuffer());
307
308 SmallVector<StringRef> ObjcopyArgs = {
309 *ObjcopyPath,
310 Output,
311 };
312
313 // Remove the old .llvm.offloading section to prevent further linking.
314 ObjcopyArgs.emplace_back(Args: "--remove-section");
315 ObjcopyArgs.emplace_back(Args: ".llvm.offloading");
316 StringRef Prefix = "llvm";
317 auto Section = (Prefix + "_offload_entries").str();
318 // Rename the offloading entires to make them private to this link unit.
319 ObjcopyArgs.emplace_back(Args: "--rename-section");
320 ObjcopyArgs.emplace_back(
321 Args: Args.MakeArgString(Str: Section + "=" + Section + Suffix));
322
323 // Rename the __start_ / __stop_ symbols appropriately to iterate over the
324 // newly renamed section containing the offloading entries.
325 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
326 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__start_" + Section + "=" +
327 "__start_" + Section + Suffix));
328 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
329 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__stop_" + Section + "=" +
330 "__stop_" + Section + Suffix));
331
332 if (Error Err = executeCommands(ExecutablePath: *ObjcopyPath, Args: ObjcopyArgs))
333 return Err;
334
335 return Error::success();
336}
337
338/// Runs the wrapped linker job with the newly created input.
339Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
340 llvm::TimeTraceScope TimeScope("Execute host linker");
341
342 // Render the linker arguments and add the newly created image. We add it
343 // after the output file to ensure it is linked with the correct libraries.
344 StringRef LinkerPath = Args.getLastArgValue(Id: OPT_linker_path_EQ);
345 if (LinkerPath.empty())
346 return createStringError(Fmt: "linker path missing, must pass 'linker-path'");
347 ArgStringList NewLinkerArgs;
348 for (const opt::Arg *Arg : Args) {
349 // Do not forward arguments only intended for the linker wrapper.
350 if (Arg->getOption().hasFlag(Val: WrapperOnlyOption))
351 continue;
352
353 Arg->render(Args, Output&: NewLinkerArgs);
354 if (Arg->getOption().matches(ID: OPT_o) || Arg->getOption().matches(ID: OPT_out))
355 llvm::transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs),
356 F: [&](StringRef Arg) { return Args.MakeArgString(Str: Arg); });
357 }
358
359 SmallVector<StringRef> LinkerArgs({LinkerPath});
360 for (StringRef Arg : NewLinkerArgs)
361 LinkerArgs.push_back(Elt: Arg);
362 if (Error Err = executeCommands(ExecutablePath: LinkerPath, Args: LinkerArgs))
363 return Err;
364
365 if (Args.hasArg(Ids: OPT_relocatable))
366 return relocateOffloadSection(Args, Output: ExecutableName);
367
368 return Error::success();
369}
370
371void printVersion(raw_ostream &OS) {
372 OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper") << '\n';
373}
374
375namespace nvptx {
376Expected<StringRef>
377fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
378 const ArgList &Args) {
379 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
380 // NVPTX uses the fatbinary program to bundle the linked images.
381 Expected<std::string> FatBinaryPath =
382 findProgram(Name: "fatbinary", Paths: {CudaBinaryPath + "/bin"});
383 if (!FatBinaryPath)
384 return FatBinaryPath.takeError();
385
386 llvm::Triple Triple(
387 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
388
389 // Create a new file to write the linked device image to.
390 auto TempFileOrErr =
391 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "fatbin");
392 if (!TempFileOrErr)
393 return TempFileOrErr.takeError();
394
395 SmallVector<StringRef, 16> CmdArgs;
396 CmdArgs.push_back(Elt: *FatBinaryPath);
397 CmdArgs.push_back(Elt: Triple.isArch64Bit() ? "-64" : "-32");
398 CmdArgs.push_back(Elt: "--create");
399 CmdArgs.push_back(Elt: *TempFileOrErr);
400 for (const auto &[File, Arch] : InputFiles)
401 CmdArgs.push_back(Elt: Args.MakeArgString(
402 Str: "--image3=kind=elf,sm=" + Arch.drop_front(N: 3) + ",file=" + File));
403
404 if (Error Err = executeCommands(ExecutablePath: *FatBinaryPath, Args: CmdArgs))
405 return std::move(Err);
406
407 return *TempFileOrErr;
408}
409} // namespace nvptx
410
411namespace amdgcn {
412Expected<StringRef>
413fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
414 const ArgList &Args) {
415 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
416
417 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
418 Expected<std::string> OffloadBundlerPath = findProgram(
419 Name: "clang-offload-bundler", Paths: {getMainExecutable(Name: "clang-offload-bundler")});
420 if (!OffloadBundlerPath)
421 return OffloadBundlerPath.takeError();
422
423 // Create a new file to write the linked device image to.
424 auto TempFileOrErr =
425 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "hipfb");
426 if (!TempFileOrErr)
427 return TempFileOrErr.takeError();
428
429 BumpPtrAllocator Alloc;
430 StringSaver Saver(Alloc);
431
432 SmallVector<StringRef, 16> CmdArgs;
433 CmdArgs.push_back(Elt: *OffloadBundlerPath);
434 CmdArgs.push_back(Elt: "-type=o");
435 CmdArgs.push_back(Elt: "-bundle-align=4096");
436
437 if (Args.hasArg(Ids: OPT_compress))
438 CmdArgs.push_back(Elt: "-compress");
439 if (auto *Arg = Args.getLastArg(Ids: OPT_compression_level_eq))
440 CmdArgs.push_back(
441 Elt: Args.MakeArgString(Str: Twine("-compression-level=") + Arg->getValue()));
442
443 SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux-gnu"};
444 for (const auto &[File, Arch] : InputFiles) {
445 Targets.push_back(Elt: Saver.save(S: Arch == "amdgcnspirv"
446 ? "hip-spirv64-amd-amdhsa--" + Arch
447 : "hip-amdgcn-amd-amdhsa--" + Arch));
448 }
449 CmdArgs.push_back(Elt: Saver.save(S: llvm::join(R&: Targets, Separator: ",")));
450
451#ifdef _WIN32
452 CmdArgs.push_back("-input=NUL");
453#else
454 CmdArgs.push_back(Elt: "-input=/dev/null");
455#endif
456 for (const auto &[File, Arch] : InputFiles)
457 CmdArgs.push_back(Elt: Saver.save(S: "-input=" + File));
458
459 CmdArgs.push_back(Elt: Saver.save(S: "-output=" + *TempFileOrErr));
460
461 if (Error Err = executeCommands(ExecutablePath: *OffloadBundlerPath, Args: CmdArgs))
462 return std::move(Err);
463
464 return *TempFileOrErr;
465}
466} // namespace amdgcn
467
468namespace generic {
469Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
470 uint16_t ActiveOffloadKindMask) {
471 llvm::TimeTraceScope TimeScope("Clang");
472 // Use `clang` to invoke the appropriate device tools.
473 Expected<std::string> ClangPath =
474 findProgram(Name: "clang", Paths: {getMainExecutable(Name: "clang")});
475 if (!ClangPath)
476 return ClangPath.takeError();
477
478 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
479 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
480 // Create a new file to write the linked device image to. Assume that the
481 // input filename already has the device and architecture.
482 std::string OutputFileBase =
483 "." + Triple.getArchName().str() + "." + Arch.str();
484 auto TempFileOrErr = createOutputFile(
485 Prefix: sys::path::filename(path: ExecutableName) + OutputFileBase, Extension: "img");
486 if (!TempFileOrErr)
487 return TempFileOrErr.takeError();
488
489 SmallVector<StringRef, 16> CmdArgs{
490 *ClangPath,
491 "--no-default-config",
492 "-o",
493 *TempFileOrErr,
494 // Without -dumpdir, Clang will place auxiliary output files in the
495 // temporary directory of TempFileOrErr, where they will not easily be
496 // found by the user and might eventually be automatically removed. Tell
497 // Clang to instead place them alongside the final executable.
498 "-dumpdir",
499 Args.MakeArgString(Str: ExecutableName + OutputFileBase + ".img."),
500 Args.MakeArgString(Str: "--target=" + Triple.getTriple()),
501 };
502
503 if (!Arch.empty())
504 Triple.isAMDGPU() ? CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-mcpu=" + Arch))
505 : CmdArgs.push_back(Elt: Args.MakeArgString(Str: "-march=" + Arch));
506
507 // AMDGPU is always in LTO mode currently.
508 if (Triple.isAMDGPU())
509 CmdArgs.push_back(Elt: "-flto");
510
511 // Forward all of the `--offload-opt` and similar options to the device.
512 for (auto &Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus, Ids: OPT_mllvm))
513 CmdArgs.append(
514 IL: {"-Xlinker",
515 Args.MakeArgString(Str: "--plugin-opt=" + StringRef(Arg->getValue()))});
516
517 if (!Triple.isNVPTX() && !Triple.isSPIRV())
518 CmdArgs.push_back(Elt: "-Wl,--no-undefined");
519
520 for (StringRef InputFile : InputFiles)
521 CmdArgs.push_back(Elt: InputFile);
522
523 // If this is CPU offloading we copy the input libraries.
524 if (!Triple.isGPU()) {
525 CmdArgs.push_back(Elt: "-Wl,-Bsymbolic");
526 CmdArgs.push_back(Elt: "-shared");
527 ArgStringList LinkerArgs;
528 for (const opt::Arg *Arg :
529 Args.filtered(Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_library_path, Ids: OPT_rpath,
530 Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
531 // Sometimes needed libraries are passed by name, such as when using
532 // sanitizers. We need to check the file magic for any libraries.
533 if (Arg->getOption().matches(ID: OPT_INPUT)) {
534 if (!sys::fs::exists(Path: Arg->getValue()) ||
535 sys::fs::is_directory(Path: Arg->getValue()))
536 continue;
537
538 file_magic Magic;
539 if (auto EC = identify_magic(path: Arg->getValue(), result&: Magic))
540 return createStringError(Fmt: "Failed to open %s", Vals: Arg->getValue());
541 if (Magic != file_magic::archive &&
542 Magic != file_magic::elf_shared_object)
543 continue;
544 }
545 if (Arg->getOption().matches(ID: OPT_whole_archive))
546 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--whole-archive"));
547 else if (Arg->getOption().matches(ID: OPT_no_whole_archive))
548 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--no-whole-archive"));
549 else
550 Arg->render(Args, Output&: LinkerArgs);
551 }
552 llvm::append_range(C&: CmdArgs, R&: LinkerArgs);
553 }
554
555 // Pass on -mllvm options to the linker invocation.
556 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
557 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(
558 Str: "-mllvm=" + StringRef(Arg->getValue()))});
559
560 if (SaveTemps && linkerSupportsLTO(Args))
561 CmdArgs.push_back(Elt: "-Wl,--save-temps");
562
563 if (Args.hasArg(Ids: OPT_embed_bitcode))
564 CmdArgs.push_back(Elt: "-Wl,--lto-emit-llvm");
565
566 // For linking device code with the SYCL offload kind, special handling is
567 // required. Passing --sycl-link to clang results in a call to
568 // clang-sycl-linker. Additional linker flags required by clang-sycl-linker
569 // will be communicated via the -Xlinker option.
570 if (ActiveOffloadKindMask & OFK_SYCL) {
571 CmdArgs.push_back(Elt: "--sycl-link");
572 CmdArgs.append(
573 IL: {"-Xlinker", Args.MakeArgString(Str: "-triple=" + Triple.getTriple())});
574 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: "-arch=" + Arch)});
575 }
576
577 for (StringRef Arg : Args.getAllArgValues(Id: OPT_linker_arg_EQ))
578 CmdArgs.append(IL: {"-Xlinker", Args.MakeArgString(Str: Arg)});
579 for (StringRef Arg : Args.getAllArgValues(Id: OPT_compiler_arg_EQ))
580 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Arg));
581
582 if (Error Err = executeCommands(ExecutablePath: *ClangPath, Args: CmdArgs))
583 return std::move(Err);
584
585 return *TempFileOrErr;
586}
587} // namespace generic
588
589Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
590 const ArgList &Args,
591 uint16_t ActiveOffloadKindMask) {
592 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
593 switch (Triple.getArch()) {
594 case Triple::nvptx:
595 case Triple::nvptx64:
596 case Triple::amdgcn:
597 case Triple::x86:
598 case Triple::x86_64:
599 case Triple::aarch64:
600 case Triple::aarch64_be:
601 case Triple::ppc64:
602 case Triple::ppc64le:
603 case Triple::spirv64:
604 case Triple::systemz:
605 case Triple::loongarch64:
606 return generic::clang(InputFiles, Args, ActiveOffloadKindMask);
607 default:
608 return createStringError(S: Triple.getArchName() +
609 " linking is not supported");
610 }
611}
612
613Error containerizeRawImage(std::unique_ptr<MemoryBuffer> &Img, OffloadKind Kind,
614 const ArgList &Args) {
615 llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
616 if (Kind == OFK_OpenMP && Triple.isSPIRV() &&
617 Triple.getVendor() == llvm::Triple::Intel)
618 return offloading::intel::containerizeOpenMPSPIRVImage(Binary&: Img);
619 return Error::success();
620}
621
622Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
623 const OffloadBinary &Binary = *File.getBinary();
624
625 StringRef Prefix =
626 sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier());
627 SmallString<128> Filename;
628 (Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch())
629 .toVector(Out&: Filename);
630 auto TempFileOrErr = createOutputFile(Prefix: Filename, Extension: "o");
631 if (!TempFileOrErr)
632 return TempFileOrErr.takeError();
633
634 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
635 FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Binary.getImage().size());
636 if (!OutputOrErr)
637 return OutputOrErr.takeError();
638 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
639 llvm::copy(Range: Binary.getImage(), Out: Output->getBufferStart());
640 if (Error E = Output->commit())
641 return std::move(E);
642
643 return *TempFileOrErr;
644}
645
646// Compile the module to an object file using the appropriate target machine for
647// the host triple.
648Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
649 llvm::TimeTraceScope TimeScope("Compile module");
650 std::string Msg;
651 const Target *T = TargetRegistry::lookupTarget(TheTriple: M.getTargetTriple(), Error&: Msg);
652 if (!T)
653 return createStringError(S: Msg);
654
655 auto Options =
656 codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: M.getTargetTriple());
657 StringRef CPU = "";
658 StringRef Features = "";
659 std::unique_ptr<TargetMachine> TM(
660 T->createTargetMachine(TT: M.getTargetTriple(), CPU, Features, Options,
661 RM: Reloc::PIC_, CM: M.getCodeModel()));
662
663 if (M.getDataLayout().isDefault())
664 M.setDataLayout(TM->createDataLayout());
665
666 int FD = -1;
667 auto TempFileOrErr =
668 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
669 getOffloadKindName(Name: Kind) + ".image.wrapper",
670 Extension: "o");
671 if (!TempFileOrErr)
672 return TempFileOrErr.takeError();
673 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
674 return errorCodeToError(EC);
675
676 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true);
677
678 legacy::PassManager CodeGenPasses;
679 TargetLibraryInfoImpl TLII(M.getTargetTriple());
680 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
681 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
682 CodeGenFileType::ObjectFile))
683 return createStringError(Fmt: "Failed to execute host backend");
684 CodeGenPasses.run(M);
685
686 return *TempFileOrErr;
687}
688
689/// Creates the object file containing the device image and runtime
690/// registration code from the device images stored in \p Images.
691Expected<StringRef>
692wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
693 const ArgList &Args, OffloadKind Kind) {
694 llvm::TimeTraceScope TimeScope("Wrap bundled images");
695
696 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
697 for (const auto &Buffer : Buffers)
698 BuffersToWrap.emplace_back(
699 Args: ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
700
701 LLVMContext Context;
702 Module M("offload.wrapper.module", Context);
703 M.setTargetTriple(Triple(
704 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple())));
705
706 switch (Kind) {
707 case OFK_OpenMP:
708 if (Error Err = offloading::wrapOpenMPBinaries(
709 M, Images: BuffersToWrap, EntryArray: offloading::getOffloadEntryArray(M),
710 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(Ids: OPT_relocatable)))
711 return std::move(Err);
712 break;
713 case OFK_Cuda:
714 if (Error Err = offloading::wrapCudaBinary(
715 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M),
716 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
717 return std::move(Err);
718 break;
719 case OFK_HIP:
720 if (Error Err = offloading::wrapHIPBinary(
721 M, Images: BuffersToWrap.front(), EntryArray: offloading::getOffloadEntryArray(M)))
722 return std::move(Err);
723 break;
724 case OFK_SYCL: {
725 // TODO: fill these options once the Driver supports them.
726 offloading::SYCLJITOptions Options;
727 if (Error Err =
728 offloading::wrapSYCLBinaries(M, Buffer: BuffersToWrap.front(), Options))
729 return std::move(Err);
730 break;
731 }
732 default:
733 return createStringError(S: getOffloadKindName(Name: Kind) +
734 " wrapping is not supported");
735 }
736
737 if (Args.hasArg(Ids: OPT_print_wrapped_module))
738 errs() << M;
739 if (Args.hasArg(Ids: OPT_save_temps)) {
740 int FD = -1;
741 auto TempFileOrErr =
742 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
743 getOffloadKindName(Name: Kind) + ".image.wrapper",
744 Extension: "bc");
745 if (!TempFileOrErr)
746 return TempFileOrErr.takeError();
747 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
748 return errorCodeToError(EC);
749 llvm::raw_fd_ostream OS(FD, true);
750 WriteBitcodeToFile(M, Out&: OS);
751 }
752
753 auto FileOrErr = compileModule(M, Kind);
754 if (!FileOrErr)
755 return FileOrErr.takeError();
756 return *FileOrErr;
757}
758
759Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
760bundleOpenMP(ArrayRef<OffloadingImage> Images) {
761 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
762 for (const OffloadingImage &Image : Images)
763 Buffers.emplace_back(
764 Args: MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(Image)));
765
766 return std::move(Buffers);
767}
768
769Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
770bundleSYCL(ArrayRef<OffloadingImage> Images) {
771 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
772 if (DryRun) {
773 // In dry-run mode there is an empty input which is insufficient for the
774 // testing. Therefore, we return here a stub image.
775 OffloadingImage Image;
776 Image.TheImageKind = IMG_None;
777 Image.TheOffloadKind = OffloadKind::OFK_SYCL;
778 Image.StringData["symbols"] = "stub";
779 Image.Image = MemoryBuffer::getMemBufferCopy(InputData: "");
780 SmallString<0> SerializedImage = OffloadBinary::write(Image);
781 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: SerializedImage));
782 return std::move(Buffers);
783 }
784
785 for (const OffloadingImage &Image : Images) {
786 // clang-sycl-linker packs outputs into one binary blob. Therefore, it is
787 // passed to Offload Wrapper as is.
788 StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
789 Buffers.emplace_back(Args: MemoryBuffer::getMemBufferCopy(InputData: S));
790 }
791
792 return std::move(Buffers);
793}
794
795Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
796bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
797 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
798 for (const OffloadingImage &Image : Images)
799 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
800 y: Image.StringData.lookup(Key: "arch")));
801
802 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
803 if (!FileOrErr)
804 return FileOrErr.takeError();
805
806 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
807 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
808
809 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
810 if (std::error_code EC = ImageOrError.getError())
811 return createFileError(F: *FileOrErr, EC);
812 Buffers.emplace_back(Args: std::move(*ImageOrError));
813
814 return std::move(Buffers);
815}
816
817Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
818bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
819 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
820 for (const OffloadingImage &Image : Images)
821 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
822 y: Image.StringData.lookup(Key: "arch")));
823
824 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
825 if (!FileOrErr)
826 return FileOrErr.takeError();
827
828 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
829 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
830
831 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
832 if (std::error_code EC = ImageOrError.getError())
833 return createFileError(F: *FileOrErr, EC);
834 Buffers.emplace_back(Args: std::move(*ImageOrError));
835
836 return std::move(Buffers);
837}
838
839/// Transforms the input \p Images into the binary format the runtime expects
840/// for the given \p Kind.
841Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
842bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
843 OffloadKind Kind) {
844 llvm::TimeTraceScope TimeScope("Bundle linked output");
845 switch (Kind) {
846 case OFK_OpenMP:
847 return bundleOpenMP(Images);
848 case OFK_SYCL:
849 return bundleSYCL(Images);
850 case OFK_Cuda:
851 return bundleCuda(Images, Args);
852 case OFK_HIP:
853 return bundleHIP(Images, Args);
854 default:
855 return createStringError(S: getOffloadKindName(Name: Kind) +
856 " bundling is not supported");
857 }
858}
859
860/// Returns a new ArgList containg arguments used for the device linking phase.
861DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
862 const InputArgList &Args) {
863 DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
864 for (Arg *A : Args)
865 DAL.append(A);
866
867 // Set the subarchitecture and target triple for this compilation.
868 const OptTable &Tbl = getOptTable();
869 StringRef Arch = Args.MakeArgString(Str: Input.front().getBinary()->getArch());
870 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_arch_EQ),
871 Value: Arch == "generic" ? "" : Arch);
872 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_triple_EQ),
873 Value: Args.MakeArgString(Str: Input.front().getBinary()->getTriple()));
874
875 // If every input file is bitcode we have whole program visibility as we
876 // do only support static linking with bitcode.
877 auto ContainsBitcode = [](const OffloadFile &F) {
878 return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode;
879 };
880 if (llvm::all_of(Range&: Input, P: ContainsBitcode))
881 DAL.AddFlagArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_whole_program));
882
883 // Forward '-Xoffload-linker' options to the appropriate backend.
884 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_linker_args_EQ)) {
885 auto [Triple, Value] = Arg.split(Separator: '=');
886 llvm::Triple TT(Triple);
887 // If this isn't a recognized triple then it's an `arg=value` option.
888 if (TT.getArch() == Triple::ArchType::UnknownArch)
889 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
890 Value: Args.MakeArgString(Str: Arg));
891 else if (Value.empty())
892 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
893 Value: Args.MakeArgString(Str: Triple));
894 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
895 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
896 Value: Args.MakeArgString(Str: Value));
897 }
898
899 // Forward '-Xoffload-compiler' options to the appropriate backend.
900 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_compiler_args_EQ)) {
901 auto [Triple, Value] = Arg.split(Separator: '=');
902 llvm::Triple TT(Triple);
903 // If this isn't a recognized triple then it's an `arg=value` option.
904 if (TT.getArch() == Triple::ArchType::UnknownArch)
905 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
906 Value: Args.MakeArgString(Str: Arg));
907 else if (Value.empty())
908 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
909 Value: Args.MakeArgString(Str: Triple));
910 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
911 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_compiler_arg_EQ),
912 Value: Args.MakeArgString(Str: Value));
913 }
914
915 return DAL;
916}
917
918Error handleOverrideImages(
919 const InputArgList &Args,
920 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
921 for (StringRef Arg : Args.getAllArgValues(Id: OPT_override_image)) {
922 OffloadKind Kind = getOffloadKind(Name: Arg.split(Separator: "=").first);
923 StringRef Filename = Arg.split(Separator: "=").second;
924
925 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
926 MemoryBuffer::getFileOrSTDIN(Filename);
927 if (std::error_code EC = BufferOrErr.getError())
928 return createFileError(F: Filename, EC);
929
930 Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
931 ObjectFile::createELFObjectFile(Object: **BufferOrErr,
932 /*InitContent=*/false);
933 if (!ElfOrErr)
934 return ElfOrErr.takeError();
935 ObjectFile &Elf = **ElfOrErr;
936
937 OffloadingImage TheImage{};
938 TheImage.TheImageKind = IMG_Object;
939 TheImage.TheOffloadKind = Kind;
940 TheImage.StringData["triple"] =
941 Args.MakeArgString(Str: Elf.makeTriple().getTriple());
942 if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
943 TheImage.StringData["arch"] = Args.MakeArgString(Str: *CPU);
944 TheImage.Image = std::move(*BufferOrErr);
945
946 Images[Kind].emplace_back(Args: std::move(TheImage));
947 }
948 return Error::success();
949}
950
951/// Transforms all the extracted offloading input files into an image that can
952/// be registered by the runtime. If NeedsWrapping is false, writes bundled
953/// output directly without wrapping or host linking.
954Expected<SmallVector<StringRef>>
955linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> LinkerInputFiles,
956 const InputArgList &Args, char **Argv, int Argc,
957 bool NeedsWrapping) {
958 llvm::TimeTraceScope TimeScope("Handle all device input");
959
960 std::mutex ImageMtx;
961 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
962
963 // Initialize the images with any overriding inputs.
964 if (Args.hasArg(Ids: OPT_override_image))
965 if (Error Err = handleOverrideImages(Args, Images))
966 return std::move(Err);
967
968 auto Err = parallelForEachError(R&: LinkerInputFiles, Fn: [&](auto &Input) -> Error {
969 llvm::TimeTraceScope TimeScope("Link device input");
970
971 // Each thread needs its own copy of the base arguments to maintain
972 // per-device argument storage of synthetic strings.
973 const OptTable &Tbl = getOptTable();
974 BumpPtrAllocator Alloc;
975 StringSaver Saver(Alloc);
976 auto BaseArgs =
977 Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) {
978 reportError(E: createStringError(S: Err));
979 });
980 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
981
982 uint16_t ActiveOffloadKindMask = 0u;
983 for (const auto &File : Input)
984 ActiveOffloadKindMask |= File.getBinary()->getOffloadKind();
985
986 // Linking images of SYCL offload kind with images of other kind is not
987 // supported.
988 // TODO: Remove the above limitation.
989 if ((ActiveOffloadKindMask & OFK_SYCL) &&
990 ((ActiveOffloadKindMask ^ OFK_SYCL) != 0))
991 return createStringError(Fmt: "Linking images of SYCL offload kind with "
992 "images of any other kind is not supported");
993
994 // Write any remaining device inputs to an output file.
995 SmallVector<StringRef> InputFiles;
996 for (const OffloadFile &File : Input) {
997 auto FileNameOrErr = writeOffloadFile(File);
998 if (!FileNameOrErr)
999 return FileNameOrErr.takeError();
1000 InputFiles.emplace_back(Args&: *FileNameOrErr);
1001 }
1002
1003 // Link the remaining device files using the device linker.
1004 auto OutputOrErr =
1005 linkDevice(InputFiles, LinkerArgs, ActiveOffloadKindMask);
1006 if (!OutputOrErr)
1007 return OutputOrErr.takeError();
1008
1009 // Store the offloading image for each linked output file.
1010 for (OffloadKind Kind = OFK_OpenMP; Kind != OFK_LAST;
1011 Kind = static_cast<OffloadKind>((uint16_t)(Kind) << 1)) {
1012 if ((ActiveOffloadKindMask & Kind) == 0)
1013 continue;
1014 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1015 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *OutputOrErr);
1016 if (std::error_code EC = FileOrErr.getError()) {
1017 if (DryRun)
1018 FileOrErr = MemoryBuffer::getMemBuffer(InputData: "");
1019 else
1020 return createFileError(*OutputOrErr, EC);
1021 }
1022
1023 // Manually containerize offloading images not in ELF format.
1024 if (Error E = containerizeRawImage(*FileOrErr, Kind, LinkerArgs))
1025 return E;
1026
1027 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1028 OffloadingImage TheImage{};
1029 TheImage.TheImageKind =
1030 Args.hasArg(Ids: OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1031 TheImage.TheOffloadKind = Kind;
1032 TheImage.StringData["triple"] =
1033 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_triple_EQ));
1034 TheImage.StringData["arch"] =
1035 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_arch_EQ));
1036 TheImage.Image = std::move(*FileOrErr);
1037
1038 Images[Kind].emplace_back(Args: std::move(TheImage));
1039 }
1040 return Error::success();
1041 });
1042 if (Err)
1043 return std::move(Err);
1044
1045 // Create a binary image of each offloading image and either embed it into a
1046 // new object file, or if all inputs were direct offload binaries, emit the
1047 // fat binary directly (e.g. .hipfb / .fatbin).
1048 SmallVector<StringRef> WrappedOutput;
1049 for (auto &[Kind, Input] : Images) {
1050 // We sort the entries before bundling so they appear in a deterministic
1051 // order in the final binary.
1052 llvm::sort(C&: Input, Comp: [](OffloadingImage &A, OffloadingImage &B) {
1053 return A.StringData["triple"] > B.StringData["triple"] ||
1054 A.StringData["arch"] > B.StringData["arch"] ||
1055 A.TheOffloadKind < B.TheOffloadKind;
1056 });
1057 auto BundledImagesOrErr = bundleLinkedOutput(Images: Input, Args, Kind);
1058 if (!BundledImagesOrErr)
1059 return BundledImagesOrErr.takeError();
1060
1061 if (!NeedsWrapping) {
1062 if (BundledImagesOrErr->size() != 1)
1063 return createStringError(
1064 Fmt: "Expected a single bundled image for direct fat binary output");
1065
1066 Expected<std::unique_ptr<FileOutputBuffer>> FOBOrErr =
1067 FileOutputBuffer::create(
1068 FilePath: ExecutableName, Size: BundledImagesOrErr->front()->getBufferSize());
1069 if (!FOBOrErr)
1070 return FOBOrErr.takeError();
1071 std::unique_ptr<FileOutputBuffer> FOB = std::move(*FOBOrErr);
1072 llvm::copy(Range: BundledImagesOrErr->front()->getBuffer(),
1073 Out: FOB->getBufferStart());
1074 if (Error E = FOB->commit())
1075 return std::move(E);
1076
1077 continue;
1078 }
1079
1080 auto OutputOrErr = wrapDeviceImages(Buffers: *BundledImagesOrErr, Args, Kind);
1081 if (!OutputOrErr)
1082 return OutputOrErr.takeError();
1083 WrappedOutput.push_back(Elt: *OutputOrErr);
1084 }
1085
1086 return WrappedOutput;
1087}
1088
1089std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1090 const Twine &Name) {
1091 SmallString<128> Path;
1092 if (Dir.starts_with(Prefix: "="))
1093 sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name);
1094 else
1095 sys::path::append(path&: Path, a: Dir, b: Name);
1096
1097 if (sys::fs::exists(Path))
1098 return static_cast<std::string>(Path);
1099 return std::nullopt;
1100}
1101
1102std::optional<std::string>
1103findFromSearchPaths(StringRef Name, StringRef Root,
1104 ArrayRef<StringRef> SearchPaths) {
1105 for (StringRef Dir : SearchPaths)
1106 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1107 return File;
1108 return std::nullopt;
1109}
1110
1111std::optional<std::string>
1112searchLibraryBaseName(StringRef Name, StringRef Root,
1113 ArrayRef<StringRef> SearchPaths) {
1114 for (StringRef Dir : SearchPaths) {
1115 if (std::optional<std::string> File =
1116 findFile(Dir, Root, Name: "lib" + Name + ".so"))
1117 return File;
1118 if (std::optional<std::string> File =
1119 findFile(Dir, Root, Name: "lib" + Name + ".a"))
1120 return File;
1121 }
1122 return std::nullopt;
1123}
1124
1125/// Search for static libraries in the linker's library path given input like
1126/// `-lfoo` or `-l:libfoo.a`.
1127std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1128 ArrayRef<StringRef> SearchPaths) {
1129 if (Input.starts_with(Prefix: ":") || Input.ends_with(Suffix: ".lib"))
1130 return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths);
1131 return searchLibraryBaseName(Name: Input, Root, SearchPaths);
1132}
1133
1134/// Search the input files and libraries for embedded device offloading code
1135/// and add it to the list of files to be linked. Files coming from static
1136/// libraries are only added to the input if they are used by an existing
1137/// input file. Returns a list of input files intended for a single linking job.
1138Expected<SmallVector<SmallVector<OffloadFile>>>
1139getDeviceInput(const ArgList &Args) {
1140 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1141
1142 // Skip all the input if the user is overriding the output.
1143 if (Args.hasArg(Ids: OPT_override_image))
1144 return SmallVector<SmallVector<OffloadFile>>();
1145
1146 StringRef Root = Args.getLastArgValue(Id: OPT_sysroot_EQ);
1147 SmallVector<StringRef> LibraryPaths;
1148 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path, Ids: OPT_libpath))
1149 LibraryPaths.push_back(Elt: Arg->getValue());
1150
1151 BumpPtrAllocator Alloc;
1152 StringSaver Saver(Alloc);
1153
1154 // Try to extract device code from the linker input files.
1155 bool WholeArchive = Args.hasArg(Ids: OPT_wholearchive_flag) ? true : false;
1156 SmallVector<OffloadFile> ObjectFilesToExtract;
1157 SmallVector<OffloadFile> ArchiveFilesToExtract;
1158 for (const opt::Arg *Arg : Args.filtered(
1159 Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
1160 if (Arg->getOption().matches(ID: OPT_whole_archive) ||
1161 Arg->getOption().matches(ID: OPT_no_whole_archive)) {
1162 WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive);
1163 continue;
1164 }
1165
1166 std::optional<std::string> Filename =
1167 Arg->getOption().matches(ID: OPT_library)
1168 ? searchLibrary(Input: Arg->getValue(), Root, SearchPaths: LibraryPaths)
1169 : std::string(Arg->getValue());
1170
1171 if (!Filename && Arg->getOption().matches(ID: OPT_library))
1172 reportError(
1173 E: createStringError(Fmt: "unable to find library -l%s", Vals: Arg->getValue()));
1174
1175 if (!Filename || !sys::fs::exists(Path: *Filename) ||
1176 sys::fs::is_directory(Path: *Filename))
1177 continue;
1178
1179 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1180 MemoryBuffer::getFileOrSTDIN(Filename: *Filename);
1181 if (std::error_code EC = BufferOrErr.getError())
1182 return createFileError(F: *Filename, EC);
1183
1184 MemoryBufferRef Buffer = **BufferOrErr;
1185 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::elf_shared_object)
1186 continue;
1187
1188 SmallVector<OffloadFile> Binaries;
1189 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1190 return std::move(Err);
1191
1192 for (auto &OffloadFile : Binaries) {
1193 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::archive &&
1194 !WholeArchive)
1195 ArchiveFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1196 else
1197 ObjectFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1198 }
1199 }
1200
1201 // Link all standard input files and update the list of symbols.
1202 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1203 for (OffloadFile &Binary : ObjectFilesToExtract) {
1204 if (!Binary.getBinary())
1205 continue;
1206
1207 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1208 for (const auto &[ID, Input] : InputFiles)
1209 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1210 CompatibleTargets.emplace_back(Args: ID);
1211
1212 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1213 // If another target needs this binary it must be copied instead.
1214 if (Index == CompatibleTargets.size() - 1)
1215 InputFiles[ID].emplace_back(Args: std::move(Binary));
1216 else
1217 InputFiles[ID].emplace_back(Args: Binary.copy());
1218 }
1219 }
1220
1221 llvm::DenseSet<StringRef> ShouldExtract;
1222 for (auto &Arg : Args.getAllArgValues(Id: OPT_should_extract))
1223 ShouldExtract.insert(V: Arg);
1224
1225 // We only extract archive members from the fat binary if we find a used or
1226 // requested target. Unlike normal static archive handling, we just extract
1227 // every object file contained in the archive.
1228 for (OffloadFile &Binary : ArchiveFilesToExtract) {
1229 if (!Binary.getBinary())
1230 continue;
1231
1232 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1233 for (const auto &[ID, Input] : InputFiles)
1234 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1235 CompatibleTargets.emplace_back(Args: ID);
1236
1237 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1238 // Only extract an if we have an an object matching this target or it
1239 // was specifically requested.
1240 if (!InputFiles.count(Key: ID) && !ShouldExtract.contains(V: ID.second))
1241 continue;
1242
1243 // If another target needs this binary it must be copied instead.
1244 if (Index == CompatibleTargets.size() - 1)
1245 InputFiles[ID].emplace_back(Args: std::move(Binary));
1246 else
1247 InputFiles[ID].emplace_back(Args: Binary.copy());
1248 }
1249 }
1250
1251 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1252 for (auto &[ID, Input] : InputFiles)
1253 InputsForTarget.emplace_back(Args: std::move(Input));
1254
1255 return std::move(InputsForTarget);
1256}
1257
1258} // namespace
1259
1260int main(int Argc, char **Argv) {
1261 InitLLVM X(Argc, Argv);
1262 InitializeAllTargetInfos();
1263 InitializeAllTargets();
1264 InitializeAllTargetMCs();
1265 InitializeAllAsmParsers();
1266 InitializeAllAsmPrinters();
1267
1268 LinkerExecutable = Argv[0];
1269 sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]);
1270
1271 const OptTable &Tbl = getOptTable();
1272 BumpPtrAllocator Alloc;
1273 StringSaver Saver(Alloc);
1274 auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) {
1275 reportError(E: createStringError(S: Err));
1276 });
1277
1278 if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) {
1279 Tbl.printHelp(
1280 OS&: outs(),
1281 Usage: "clang-linker-wrapper [options] -- <options to passed to the linker>",
1282 Title: "\nA wrapper utility over the host linker. It scans the input files\n"
1283 "for sections that require additional processing prior to linking.\n"
1284 "The will then transparently pass all arguments and input to the\n"
1285 "specified host linker to create the final binary.\n",
1286 ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden));
1287 return EXIT_SUCCESS;
1288 }
1289 if (Args.hasArg(Ids: OPT_v)) {
1290 printVersion(OS&: outs());
1291 return EXIT_SUCCESS;
1292 }
1293
1294 // This forwards '-mllvm' arguments to LLVM if present.
1295 SmallVector<const char *> NewArgv = {Argv[0]};
1296 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
1297 NewArgv.push_back(Elt: Arg->getValue());
1298 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus))
1299 NewArgv.push_back(Elt: Arg->getValue());
1300 SmallVector<PassPlugin, 1> PluginList;
1301 PassPlugins.setCallback([&](const std::string &PluginPath) {
1302 auto Plugin = PassPlugin::Load(Filename: PluginPath);
1303 if (!Plugin)
1304 reportFatalUsageError(Err: Plugin.takeError());
1305 PluginList.emplace_back(Args&: Plugin.get());
1306 });
1307 cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]);
1308
1309 Verbose = Args.hasArg(Ids: OPT_verbose);
1310 DryRun = Args.hasArg(Ids: OPT_dry_run);
1311 SaveTemps = Args.hasArg(Ids: OPT_save_temps);
1312 CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str();
1313
1314 llvm::Triple Triple(
1315 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1316 if (Args.hasArg(Ids: OPT_o))
1317 ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out");
1318 else if (Args.hasArg(Ids: OPT_out))
1319 ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe");
1320 else
1321 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1322
1323 parallel::strategy = hardware_concurrency(ThreadCount: 1);
1324 if (auto *Arg = Args.getLastArg(Ids: OPT_wrapper_jobs)) {
1325 StringRef Val = Arg->getValue();
1326 if (Val.equals_insensitive(RHS: "jobserver"))
1327 parallel::strategy = jobserver_concurrency();
1328 else {
1329 unsigned Threads = 0;
1330 if (!llvm::to_integer(S: Val, Num&: Threads) || Threads == 0)
1331 reportError(E: createStringError(
1332 Fmt: "%s: expected a positive integer or 'jobserver', got '%s'",
1333 Vals: Arg->getSpelling().data(), Vals: Val.data()));
1334 else
1335 parallel::strategy = hardware_concurrency(ThreadCount: Threads);
1336 }
1337 }
1338
1339 if (Args.hasArg(Ids: OPT_wrapper_time_trace_eq)) {
1340 unsigned Granularity;
1341 Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500")
1342 .getAsInteger(Radix: 10, Result&: Granularity);
1343 timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]);
1344 }
1345
1346 {
1347 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1348
1349 // Extract the device input files stored in the host fat binary.
1350 auto DeviceInputFiles = getDeviceInput(Args);
1351 if (!DeviceInputFiles)
1352 reportError(E: DeviceInputFiles.takeError());
1353
1354 // Check if we should emit fat binary directly without wrapping or host
1355 // linking.
1356 bool EmitFatbinOnly = Args.hasArg(Ids: OPT_emit_fatbin_only);
1357
1358 // Link and process the device images. The function may emit a direct fat
1359 // binary if --emit-fatbin-only is specified.
1360 auto FilesOrErr = linkAndWrapDeviceFiles(LinkerInputFiles: *DeviceInputFiles, Args, Argv,
1361 Argc, NeedsWrapping: !EmitFatbinOnly);
1362 if (!FilesOrErr)
1363 reportError(E: FilesOrErr.takeError());
1364
1365 // Run the host linking job with the rendered arguments.
1366 if (!EmitFatbinOnly) {
1367 if (Error Err = runLinker(Files: *FilesOrErr, Args))
1368 reportError(E: std::move(Err));
1369 }
1370 }
1371
1372 if (const opt::Arg *Arg = Args.getLastArg(Ids: OPT_wrapper_time_trace_eq)) {
1373 if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName))
1374 reportError(E: std::move(Err));
1375 timeTraceProfilerCleanup();
1376 }
1377
1378 // Remove the temporary files created.
1379 if (!SaveTemps)
1380 for (const auto &TempFile : TempFiles)
1381 if (std::error_code EC = sys::fs::remove(path: TempFile))
1382 reportError(E: createFileError(F: TempFile, EC));
1383
1384 return EXIT_SUCCESS;
1385}
1386