1//===-- clang-linker-wrapper/ClangLinkerWrapper.cpp - wrapper over linker-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This tool works as a wrapper over a linking job. This tool is used to create
10// linked device images for offloading. It scans the linker's input for embedded
11// device offloading data stored in sections `.llvm.offloading` and extracts it
12// as a temporary file. The extracted device files will then be passed to a
13// device linking job to create a final device image.
14//
15//===---------------------------------------------------------------------===//
16
17#include "clang/Basic/TargetID.h"
18#include "clang/Basic/Version.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/BinaryFormat/Magic.h"
21#include "llvm/Bitcode/BitcodeWriter.h"
22#include "llvm/CodeGen/CommandFlags.h"
23#include "llvm/Frontend/Offloading/OffloadWrapper.h"
24#include "llvm/Frontend/Offloading/Utility.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DiagnosticPrinter.h"
27#include "llvm/IR/Module.h"
28#include "llvm/IRReader/IRReader.h"
29#include "llvm/LTO/LTO.h"
30#include "llvm/MC/TargetRegistry.h"
31#include "llvm/Object/Archive.h"
32#include "llvm/Object/ArchiveWriter.h"
33#include "llvm/Object/Binary.h"
34#include "llvm/Object/ELFObjectFile.h"
35#include "llvm/Object/IRObjectFile.h"
36#include "llvm/Object/ObjectFile.h"
37#include "llvm/Object/OffloadBinary.h"
38#include "llvm/Option/ArgList.h"
39#include "llvm/Option/OptTable.h"
40#include "llvm/Option/Option.h"
41#include "llvm/Passes/PassPlugin.h"
42#include "llvm/Remarks/HotnessThresholdParser.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/Support/Errc.h"
45#include "llvm/Support/FileOutputBuffer.h"
46#include "llvm/Support/FileSystem.h"
47#include "llvm/Support/InitLLVM.h"
48#include "llvm/Support/MemoryBuffer.h"
49#include "llvm/Support/Parallel.h"
50#include "llvm/Support/Path.h"
51#include "llvm/Support/Program.h"
52#include "llvm/Support/Signals.h"
53#include "llvm/Support/SourceMgr.h"
54#include "llvm/Support/StringSaver.h"
55#include "llvm/Support/TargetSelect.h"
56#include "llvm/Support/TimeProfiler.h"
57#include "llvm/Support/WithColor.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Target/TargetMachine.h"
60#include "llvm/TargetParser/Host.h"
61#include <atomic>
62#include <optional>
63
64using namespace llvm;
65using namespace llvm::opt;
66using namespace llvm::object;
67
68// Various tools (e.g., llc and opt) duplicate this series of declarations for
69// options related to passes and remarks.
70
71static cl::opt<bool> RemarksWithHotness(
72 "pass-remarks-with-hotness",
73 cl::desc("With PGO, include profile count in optimization remarks"),
74 cl::Hidden);
75
76static cl::opt<std::optional<uint64_t>, false, remarks::HotnessThresholdParser>
77 RemarksHotnessThreshold(
78 "pass-remarks-hotness-threshold",
79 cl::desc("Minimum profile count required for "
80 "an optimization remark to be output. "
81 "Use 'auto' to apply the threshold from profile summary."),
82 cl::value_desc("N or 'auto'"), cl::init(Val: 0), cl::Hidden);
83
84static cl::opt<std::string>
85 RemarksFilename("pass-remarks-output",
86 cl::desc("Output filename for pass remarks"),
87 cl::value_desc("filename"));
88
89static cl::opt<std::string>
90 RemarksPasses("pass-remarks-filter",
91 cl::desc("Only record optimization remarks from passes whose "
92 "names match the given regular expression"),
93 cl::value_desc("regex"));
94
95static cl::opt<std::string> RemarksFormat(
96 "pass-remarks-format",
97 cl::desc("The format used for serializing remarks (default: YAML)"),
98 cl::value_desc("format"), cl::init(Val: "yaml"));
99
100static cl::list<std::string>
101 PassPlugins("load-pass-plugin",
102 cl::desc("Load passes from plugin library"));
103
104static cl::opt<std::string> PassPipeline(
105 "passes",
106 cl::desc(
107 "A textual description of the pass pipeline. To have analysis passes "
108 "available before a certain pass, add 'require<foo-analysis>'. "
109 "'-passes' overrides the pass pipeline (but not all effects) from "
110 "specifying '--opt-level=O?' (O2 is the default) to "
111 "clang-linker-wrapper. Be sure to include the corresponding "
112 "'default<O?>' in '-passes'."));
113static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline),
114 cl::desc("Alias for -passes"));
115
116/// Path of the current binary.
117static const char *LinkerExecutable;
118
119/// Ssave intermediary results.
120static bool SaveTemps = false;
121
122/// Print arguments without executing.
123static bool DryRun = false;
124
125/// Print verbose output.
126static bool Verbose = false;
127
128/// Filename of the executable being created.
129static StringRef ExecutableName;
130
131/// Binary path for the CUDA installation.
132static std::string CudaBinaryPath;
133
134/// Mutex lock to protect writes to shared TempFiles in parallel.
135static std::mutex TempFilesMutex;
136
137/// Temporary files created by the linker wrapper.
138static std::list<SmallString<128>> TempFiles;
139
140/// Codegen flags for LTO backend.
141static codegen::RegisterCodeGenFlags CodeGenFlags;
142
143/// Global flag to indicate that the LTO pipeline threw an error.
144static std::atomic<bool> LTOError;
145
146using OffloadingImage = OffloadBinary::OffloadingImage;
147
148namespace llvm {
149// Provide DenseMapInfo so that OffloadKind can be used in a DenseMap.
150template <> struct DenseMapInfo<OffloadKind> {
151 static inline OffloadKind getEmptyKey() { return OFK_LAST; }
152 static inline OffloadKind getTombstoneKey() {
153 return static_cast<OffloadKind>(OFK_LAST + 1);
154 }
155 static unsigned getHashValue(const OffloadKind &Val) { return Val; }
156
157 static bool isEqual(const OffloadKind &LHS, const OffloadKind &RHS) {
158 return LHS == RHS;
159 }
160};
161} // namespace llvm
162
163namespace {
164using std::error_code;
165
166/// Must not overlap with llvm::opt::DriverFlag.
167enum WrapperFlags {
168 WrapperOnlyOption = (1 << 4), // Options only used by the linker wrapper.
169 DeviceOnlyOption = (1 << 5), // Options only used for device linking.
170};
171
172enum ID {
173 OPT_INVALID = 0, // This is not an option ID.
174#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),
175#include "LinkerWrapperOpts.inc"
176 LastOption
177#undef OPTION
178};
179
180#define PREFIX(NAME, VALUE) \
181 static constexpr StringLiteral NAME##_init[] = VALUE; \
182 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
183 std::size(NAME##_init) - 1);
184#include "LinkerWrapperOpts.inc"
185#undef PREFIX
186
187static constexpr OptTable::Info InfoTable[] = {
188#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),
189#include "LinkerWrapperOpts.inc"
190#undef OPTION
191};
192
193class WrapperOptTable : public opt::GenericOptTable {
194public:
195 WrapperOptTable() : opt::GenericOptTable(InfoTable) {}
196};
197
198const OptTable &getOptTable() {
199 static const WrapperOptTable *Table = []() {
200 auto Result = std::make_unique<WrapperOptTable>();
201 return Result.release();
202 }();
203 return *Table;
204}
205
206void printCommands(ArrayRef<StringRef> CmdArgs) {
207 if (CmdArgs.empty())
208 return;
209
210 llvm::errs() << " \"" << CmdArgs.front() << "\" ";
211 for (auto IC = std::next(x: CmdArgs.begin()), IE = CmdArgs.end(); IC != IE; ++IC)
212 llvm::errs() << *IC << (std::next(x: IC) != IE ? " " : "\n");
213}
214
215[[noreturn]] void reportError(Error E) {
216 outs().flush();
217 logAllUnhandledErrors(E: std::move(E),
218 OS&: WithColor::error(OS&: errs(), Prefix: LinkerExecutable));
219 exit(EXIT_FAILURE);
220}
221
222/// Create an extra user-specified \p OffloadFile.
223/// TODO: We should find a way to wrap these as libraries instead.
224Expected<OffloadFile> getInputBitcodeLibrary(StringRef Input) {
225 auto [Device, Path] = StringRef(Input).split(Separator: '=');
226 auto [String, Arch] = Device.rsplit(Separator: '-');
227 auto [Kind, Triple] = String.split(Separator: '-');
228
229 llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> ImageOrError =
230 llvm::MemoryBuffer::getFileOrSTDIN(Filename: Path);
231 if (std::error_code EC = ImageOrError.getError())
232 return createFileError(F: Path, EC);
233
234 OffloadingImage Image{};
235 Image.TheImageKind = IMG_Bitcode;
236 Image.TheOffloadKind = getOffloadKind(Name: Kind);
237 Image.StringData["triple"] = Triple;
238 Image.StringData["arch"] = Arch;
239 Image.Image = std::move(*ImageOrError);
240
241 std::unique_ptr<MemoryBuffer> Binary =
242 MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(Image));
243 auto NewBinaryOrErr = OffloadBinary::create(*Binary);
244 if (!NewBinaryOrErr)
245 return NewBinaryOrErr.takeError();
246 return OffloadFile(std::move(*NewBinaryOrErr), std::move(Binary));
247}
248
249std::string getMainExecutable(const char *Name) {
250 void *Ptr = (void *)(intptr_t)&getMainExecutable;
251 auto COWPath = sys::fs::getMainExecutable(argv0: Name, MainExecAddr: Ptr);
252 return sys::path::parent_path(path: COWPath).str();
253}
254
255/// Get a temporary filename suitable for output.
256Expected<StringRef> createOutputFile(const Twine &Prefix, StringRef Extension) {
257 std::scoped_lock<decltype(TempFilesMutex)> Lock(TempFilesMutex);
258 SmallString<128> OutputFile;
259 if (SaveTemps) {
260 (Prefix + "." + Extension).toNullTerminatedStringRef(Out&: OutputFile);
261 } else {
262 if (std::error_code EC =
263 sys::fs::createTemporaryFile(Prefix, Suffix: Extension, ResultPath&: OutputFile))
264 return createFileError(F: OutputFile, EC);
265 }
266
267 TempFiles.emplace_back(args: std::move(OutputFile));
268 return TempFiles.back();
269}
270
271/// Execute the command \p ExecutablePath with the arguments \p Args.
272Error executeCommands(StringRef ExecutablePath, ArrayRef<StringRef> Args) {
273 if (Verbose || DryRun)
274 printCommands(CmdArgs: Args);
275
276 if (!DryRun)
277 if (sys::ExecuteAndWait(Program: ExecutablePath, Args))
278 return createStringError(
279 Fmt: "'%s' failed", Vals: sys::path::filename(path: ExecutablePath).str().c_str());
280 return Error::success();
281}
282
283Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
284
285 ErrorOr<std::string> Path = sys::findProgramByName(Name, Paths);
286 if (!Path)
287 Path = sys::findProgramByName(Name);
288 if (!Path && DryRun)
289 return Name.str();
290 if (!Path)
291 return createStringError(EC: Path.getError(),
292 S: "Unable to find '" + Name + "' in path");
293 return *Path;
294}
295
296/// Returns the hashed value for a constant string.
297std::string getHash(StringRef Str) {
298 llvm::MD5 Hasher;
299 llvm::MD5::MD5Result Hash;
300 Hasher.update(Str);
301 Hasher.final(Result&: Hash);
302 return llvm::utohexstr(X: Hash.low(), /*LowerCase=*/true);
303}
304
305/// Renames offloading entry sections in a relocatable link so they do not
306/// conflict with a later link job.
307Error relocateOffloadSection(const ArgList &Args, StringRef Output) {
308 llvm::Triple Triple(
309 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
310 if (Triple.isOSWindows())
311 return createStringError(
312 Fmt: "Relocatable linking is not supported on COFF targets");
313
314 Expected<std::string> ObjcopyPath =
315 findProgram(Name: "llvm-objcopy", Paths: {getMainExecutable(Name: "llvm-objcopy")});
316 if (!ObjcopyPath)
317 return ObjcopyPath.takeError();
318
319 // Use the linker output file to get a unique hash. This creates a unique
320 // identifier to rename the sections to that is deterministic to the contents.
321 auto BufferOrErr = DryRun ? MemoryBuffer::getMemBuffer(InputData: "")
322 : MemoryBuffer::getFileOrSTDIN(Filename: Output);
323 if (!BufferOrErr)
324 return createStringError(Fmt: "Failed to open %s", Vals: Output.str().c_str());
325 std::string Suffix = "_" + getHash(Str: (*BufferOrErr)->getBuffer());
326
327 SmallVector<StringRef> ObjcopyArgs = {
328 *ObjcopyPath,
329 Output,
330 };
331
332 // Remove the old .llvm.offloading section to prevent further linking.
333 ObjcopyArgs.emplace_back(Args: "--remove-section");
334 ObjcopyArgs.emplace_back(Args: ".llvm.offloading");
335 for (StringRef Prefix : {"omp", "cuda", "hip"}) {
336 auto Section = (Prefix + "_offloading_entries").str();
337 // Rename the offloading entires to make them private to this link unit.
338 ObjcopyArgs.emplace_back(Args: "--rename-section");
339 ObjcopyArgs.emplace_back(
340 Args: Args.MakeArgString(Str: Section + "=" + Section + Suffix));
341
342 // Rename the __start_ / __stop_ symbols appropriately to iterate over the
343 // newly renamed section containing the offloading entries.
344 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
345 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__start_" + Section + "=" +
346 "__start_" + Section + Suffix));
347 ObjcopyArgs.emplace_back(Args: "--redefine-sym");
348 ObjcopyArgs.emplace_back(Args: Args.MakeArgString(Str: "__stop_" + Section + "=" +
349 "__stop_" + Section + Suffix));
350 }
351
352 if (Error Err = executeCommands(ExecutablePath: *ObjcopyPath, Args: ObjcopyArgs))
353 return Err;
354
355 return Error::success();
356}
357
358/// Runs the wrapped linker job with the newly created input.
359Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
360 llvm::TimeTraceScope TimeScope("Execute host linker");
361
362 // Render the linker arguments and add the newly created image. We add it
363 // after the output file to ensure it is linked with the correct libraries.
364 StringRef LinkerPath = Args.getLastArgValue(Id: OPT_linker_path_EQ);
365 ArgStringList NewLinkerArgs;
366 for (const opt::Arg *Arg : Args) {
367 // Do not forward arguments only intended for the linker wrapper.
368 if (Arg->getOption().hasFlag(Val: WrapperOnlyOption))
369 continue;
370
371 Arg->render(Args, Output&: NewLinkerArgs);
372 if (Arg->getOption().matches(ID: OPT_o) || Arg->getOption().matches(ID: OPT_out))
373 llvm::transform(Range&: Files, d_first: std::back_inserter(x&: NewLinkerArgs),
374 F: [&](StringRef Arg) { return Args.MakeArgString(Str: Arg); });
375 }
376
377 SmallVector<StringRef> LinkerArgs({LinkerPath});
378 for (StringRef Arg : NewLinkerArgs)
379 LinkerArgs.push_back(Elt: Arg);
380 if (Error Err = executeCommands(ExecutablePath: LinkerPath, Args: LinkerArgs))
381 return Err;
382
383 if (Args.hasArg(Ids: OPT_relocatable))
384 return relocateOffloadSection(Args, Output: ExecutableName);
385
386 return Error::success();
387}
388
389void printVersion(raw_ostream &OS) {
390 OS << clang::getClangToolFullVersion(ToolName: "clang-linker-wrapper") << '\n';
391}
392
393namespace nvptx {
394Expected<StringRef>
395fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
396 const ArgList &Args) {
397 llvm::TimeTraceScope TimeScope("NVPTX fatbinary");
398 // NVPTX uses the fatbinary program to bundle the linked images.
399 Expected<std::string> FatBinaryPath =
400 findProgram(Name: "fatbinary", Paths: {CudaBinaryPath + "/bin"});
401 if (!FatBinaryPath)
402 return FatBinaryPath.takeError();
403
404 llvm::Triple Triple(
405 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
406
407 // Create a new file to write the linked device image to.
408 auto TempFileOrErr =
409 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "fatbin");
410 if (!TempFileOrErr)
411 return TempFileOrErr.takeError();
412
413 SmallVector<StringRef, 16> CmdArgs;
414 CmdArgs.push_back(Elt: *FatBinaryPath);
415 CmdArgs.push_back(Elt: Triple.isArch64Bit() ? "-64" : "-32");
416 CmdArgs.push_back(Elt: "--create");
417 CmdArgs.push_back(Elt: *TempFileOrErr);
418 for (const auto &[File, Arch] : InputFiles)
419 CmdArgs.push_back(
420 Elt: Args.MakeArgString(Str: "--image=profile=" + Arch + ",file=" + File));
421
422 if (Error Err = executeCommands(ExecutablePath: *FatBinaryPath, Args: CmdArgs))
423 return std::move(Err);
424
425 return *TempFileOrErr;
426}
427} // namespace nvptx
428
429namespace amdgcn {
430Expected<StringRef>
431fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles,
432 const ArgList &Args) {
433 llvm::TimeTraceScope TimeScope("AMDGPU Fatbinary");
434
435 // AMDGPU uses the clang-offload-bundler to bundle the linked images.
436 Expected<std::string> OffloadBundlerPath = findProgram(
437 Name: "clang-offload-bundler", Paths: {getMainExecutable(Name: "clang-offload-bundler")});
438 if (!OffloadBundlerPath)
439 return OffloadBundlerPath.takeError();
440
441 llvm::Triple Triple(
442 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
443
444 // Create a new file to write the linked device image to.
445 auto TempFileOrErr =
446 createOutputFile(Prefix: sys::path::filename(path: ExecutableName), Extension: "hipfb");
447 if (!TempFileOrErr)
448 return TempFileOrErr.takeError();
449
450 BumpPtrAllocator Alloc;
451 StringSaver Saver(Alloc);
452
453 SmallVector<StringRef, 16> CmdArgs;
454 CmdArgs.push_back(Elt: *OffloadBundlerPath);
455 CmdArgs.push_back(Elt: "-type=o");
456 CmdArgs.push_back(Elt: "-bundle-align=4096");
457
458 if (Args.hasArg(Ids: OPT_compress))
459 CmdArgs.push_back(Elt: "-compress");
460 if (auto *Arg = Args.getLastArg(Ids: OPT_compression_level_eq))
461 CmdArgs.push_back(
462 Elt: Args.MakeArgString(Str: Twine("-compression-level=") + Arg->getValue()));
463
464 SmallVector<StringRef> Targets = {"-targets=host-x86_64-unknown-linux"};
465 for (const auto &[File, Arch] : InputFiles)
466 Targets.push_back(Elt: Saver.save(S: "hip-amdgcn-amd-amdhsa--" + Arch));
467 CmdArgs.push_back(Elt: Saver.save(S: llvm::join(R&: Targets, Separator: ",")));
468
469#ifdef _WIN32
470 CmdArgs.push_back("-input=NUL");
471#else
472 CmdArgs.push_back(Elt: "-input=/dev/null");
473#endif
474 for (const auto &[File, Arch] : InputFiles)
475 CmdArgs.push_back(Elt: Saver.save(S: "-input=" + File));
476
477 CmdArgs.push_back(Elt: Saver.save(S: "-output=" + *TempFileOrErr));
478
479 if (Error Err = executeCommands(ExecutablePath: *OffloadBundlerPath, Args: CmdArgs))
480 return std::move(Err);
481
482 return *TempFileOrErr;
483}
484} // namespace amdgcn
485
486namespace generic {
487Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
488 llvm::TimeTraceScope TimeScope("Clang");
489 // Use `clang` to invoke the appropriate device tools.
490 Expected<std::string> ClangPath =
491 findProgram(Name: "clang", Paths: {getMainExecutable(Name: "clang")});
492 if (!ClangPath)
493 return ClangPath.takeError();
494
495 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
496 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
497 if (Arch.empty())
498 Arch = "native";
499 // Create a new file to write the linked device image to. Assume that the
500 // input filename already has the device and architecture.
501 auto TempFileOrErr =
502 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
503 Triple.getArchName() + "." + Arch,
504 Extension: "img");
505 if (!TempFileOrErr)
506 return TempFileOrErr.takeError();
507
508 StringRef OptLevel = Args.getLastArgValue(Id: OPT_opt_level, Default: "O2");
509 SmallVector<StringRef, 16> CmdArgs{
510 *ClangPath,
511 "--no-default-config",
512 "-o",
513 *TempFileOrErr,
514 Args.MakeArgString(Str: "--target=" + Triple.getTriple()),
515 Triple.isAMDGPU() ? Args.MakeArgString(Str: "-mcpu=" + Arch)
516 : Args.MakeArgString(Str: "-march=" + Arch),
517 Args.MakeArgString(Str: "-" + OptLevel),
518 };
519
520 if (!Triple.isNVPTX())
521 CmdArgs.push_back(Elt: "-Wl,--no-undefined");
522
523 for (StringRef InputFile : InputFiles)
524 CmdArgs.push_back(Elt: InputFile);
525
526 // If this is CPU offloading we copy the input libraries.
527 if (!Triple.isAMDGPU() && !Triple.isNVPTX()) {
528 CmdArgs.push_back(Elt: "-Wl,-Bsymbolic");
529 CmdArgs.push_back(Elt: "-shared");
530 ArgStringList LinkerArgs;
531 for (const opt::Arg *Arg :
532 Args.filtered(Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_library_path, Ids: OPT_rpath,
533 Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
534 // Sometimes needed libraries are passed by name, such as when using
535 // sanitizers. We need to check the file magic for any libraries.
536 if (Arg->getOption().matches(ID: OPT_INPUT)) {
537 if (!sys::fs::exists(Path: Arg->getValue()) ||
538 sys::fs::is_directory(Path: Arg->getValue()))
539 continue;
540
541 file_magic Magic;
542 if (auto EC = identify_magic(path: Arg->getValue(), result&: Magic))
543 return createStringError(Fmt: "Failed to open %s", Vals: Arg->getValue());
544 if (Magic != file_magic::archive &&
545 Magic != file_magic::elf_shared_object)
546 continue;
547 }
548 if (Arg->getOption().matches(ID: OPT_whole_archive))
549 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--whole-archive"));
550 else if (Arg->getOption().matches(ID: OPT_no_whole_archive))
551 LinkerArgs.push_back(Elt: Args.MakeArgString(Str: "-Wl,--no-whole-archive"));
552 else
553 Arg->render(Args, Output&: LinkerArgs);
554 }
555 llvm::copy(Range&: LinkerArgs, Out: std::back_inserter(x&: CmdArgs));
556 }
557
558 // Pass on -mllvm options to the clang invocation.
559 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm)) {
560 CmdArgs.push_back(Elt: "-mllvm");
561 CmdArgs.push_back(Elt: Arg->getValue());
562 }
563
564 if (Args.hasArg(Ids: OPT_debug))
565 CmdArgs.push_back(Elt: "-g");
566
567 if (SaveTemps)
568 CmdArgs.push_back(Elt: "-save-temps");
569
570 if (Verbose)
571 CmdArgs.push_back(Elt: "-v");
572
573 if (!CudaBinaryPath.empty())
574 CmdArgs.push_back(Elt: Args.MakeArgString(Str: "--cuda-path=" + CudaBinaryPath));
575
576 for (StringRef Arg : Args.getAllArgValues(Id: OPT_ptxas_arg))
577 llvm::copy(
578 Range: SmallVector<StringRef>({"-Xcuda-ptxas", Args.MakeArgString(Str: Arg)}),
579 Out: std::back_inserter(x&: CmdArgs));
580
581 for (StringRef Arg : Args.getAllArgValues(Id: OPT_linker_arg_EQ))
582 CmdArgs.push_back(Elt: Args.MakeArgString(Str: Arg));
583
584 for (StringRef Arg : Args.getAllArgValues(Id: OPT_builtin_bitcode_EQ)) {
585 if (llvm::Triple(Arg.split(Separator: '=').first) == Triple)
586 CmdArgs.append(IL: {"-Xclang", "-mlink-builtin-bitcode", "-Xclang",
587 Args.MakeArgString(Str: Arg.split(Separator: '=').second)});
588 }
589
590 // The OpenMPOpt pass can introduce new calls and is expensive, we do not want
591 // this when running CodeGen through clang.
592 if (Args.hasArg(Ids: OPT_clang_backend) || Args.hasArg(Ids: OPT_builtin_bitcode_EQ))
593 CmdArgs.append(IL: {"-mllvm", "-openmp-opt-disable"});
594
595 if (Error Err = executeCommands(ExecutablePath: *ClangPath, Args: CmdArgs))
596 return std::move(Err);
597
598 return *TempFileOrErr;
599}
600} // namespace generic
601
602Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
603 const ArgList &Args) {
604 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
605 switch (Triple.getArch()) {
606 case Triple::nvptx:
607 case Triple::nvptx64:
608 case Triple::amdgcn:
609 case Triple::x86:
610 case Triple::x86_64:
611 case Triple::aarch64:
612 case Triple::aarch64_be:
613 case Triple::ppc64:
614 case Triple::ppc64le:
615 case Triple::systemz:
616 return generic::clang(InputFiles, Args);
617 default:
618 return createStringError(S: Triple.getArchName() +
619 " linking is not supported");
620 }
621}
622
623void diagnosticHandler(const DiagnosticInfo &DI) {
624 std::string ErrStorage;
625 raw_string_ostream OS(ErrStorage);
626 DiagnosticPrinterRawOStream DP(OS);
627 DI.print(DP);
628
629 switch (DI.getSeverity()) {
630 case DS_Error:
631 WithColor::error(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n";
632 LTOError = true;
633 break;
634 case DS_Warning:
635 WithColor::warning(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n";
636 break;
637 case DS_Note:
638 WithColor::note(OS&: errs(), Prefix: LinkerExecutable) << ErrStorage << "\n";
639 break;
640 case DS_Remark:
641 WithColor::remark(OS&: errs()) << ErrStorage << "\n";
642 break;
643 }
644}
645
646// Get the list of target features from the input file and unify them such that
647// if there are multiple +xxx or -xxx features we only keep the last one.
648std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
649 SmallVector<StringRef> Features;
650 for (const OffloadFile &File : InputFiles) {
651 for (auto Arg : llvm::split(Str: File.getBinary()->getString(Key: "feature"), Separator: ","))
652 Features.emplace_back(Args&: Arg);
653 }
654
655 // Only add a feature if it hasn't been seen before starting from the end.
656 std::vector<std::string> UnifiedFeatures;
657 DenseSet<StringRef> UsedFeatures;
658 for (StringRef Feature : llvm::reverse(C&: Features)) {
659 if (UsedFeatures.insert(V: Feature.drop_front()).second)
660 UnifiedFeatures.push_back(x: Feature.str());
661 }
662
663 return UnifiedFeatures;
664}
665
666template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
667std::unique_ptr<lto::LTO> createLTO(
668 const ArgList &Args, const std::vector<std::string> &Features,
669 ModuleHook Hook = [](size_t, const Module &) { return true; }) {
670 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
671 // We need to remove AMD's target-id from the processor if present.
672 StringRef TargetID = Args.getLastArgValue(Id: OPT_arch_EQ);
673 StringRef Arch = clang::getProcessorFromTargetID(T: Triple, OffloadArch: TargetID);
674 lto::Config Conf;
675 lto::ThinBackend Backend;
676 // TODO: Handle index-only thin-LTO
677 Backend =
678 lto::createInProcessThinBackend(Parallelism: llvm::heavyweight_hardware_concurrency());
679
680 Conf.CPU = Arch.str();
681 Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: Triple);
682
683 Conf.RemarksFilename = RemarksFilename;
684 Conf.RemarksPasses = RemarksPasses;
685 Conf.RemarksWithHotness = RemarksWithHotness;
686 Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
687 Conf.RemarksFormat = RemarksFormat;
688
689 StringRef OptLevel = Args.getLastArgValue(Id: OPT_opt_level, Default: "O2");
690 Conf.MAttrs = Features;
691 std::optional<CodeGenOptLevel> CGOptLevelOrNone =
692 CodeGenOpt::parseLevel(C: OptLevel[1]);
693 assert(CGOptLevelOrNone && "Invalid optimization level");
694 Conf.CGOptLevel = *CGOptLevelOrNone;
695 Conf.OptLevel = OptLevel[1] - '0';
696 Conf.DefaultTriple = Triple.getTriple();
697
698 // TODO: Should we complain about combining --opt-level and -passes, as opt
699 // does? That might be too limiting in clang-linker-wrapper, so for now we
700 // just warn in the help entry for -passes that the default<O?> corresponding
701 // to --opt-level=O? should be included there. The problem is that
702 // --opt-level produces effects in clang-linker-wrapper beyond what -passes
703 // appears to be able to achieve, so rejecting the combination of --opt-level
704 // and -passes would apparently make it impossible to combine those effects
705 // with a custom pass pipeline.
706 Conf.OptPipeline = PassPipeline;
707 Conf.PassPlugins = PassPlugins;
708
709 LTOError = false;
710 Conf.DiagHandler = diagnosticHandler;
711
712 Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
713 Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
714
715 if (SaveTemps) {
716 std::string TempName = (sys::path::filename(path: ExecutableName) + "." +
717 Triple.getTriple() + "." + TargetID)
718 .str();
719 Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) {
720 std::string File =
721 !Task ? TempName + ".postlink.bc"
722 : TempName + "." + std::to_string(val: Task) + ".postlink.bc";
723 error_code EC;
724 raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
725 if (EC)
726 reportError(E: errorCodeToError(EC));
727 WriteBitcodeToFile(M, Out&: LinkedBitcode);
728 return true;
729 };
730 Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) {
731 std::string File =
732 !Task ? TempName + ".postopt.bc"
733 : TempName + "." + std::to_string(val: Task) + ".postopt.bc";
734 error_code EC;
735 raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
736 if (EC)
737 reportError(E: errorCodeToError(EC));
738 WriteBitcodeToFile(M, Out&: LinkedBitcode);
739 return true;
740 };
741 }
742 Conf.PostOptModuleHook = Hook;
743 Conf.CGFileType = (Triple.isNVPTX() || SaveTemps)
744 ? CodeGenFileType::AssemblyFile
745 : CodeGenFileType::ObjectFile;
746
747 // TODO: Handle remark files
748 Conf.HasWholeProgramVisibility = Args.hasArg(Ids: OPT_whole_program);
749
750 return std::make_unique<lto::LTO>(args: std::move(Conf), args&: Backend);
751}
752
753// Returns true if \p S is valid as a C language identifier and will be given
754// `__start_` and `__stop_` symbols.
755bool isValidCIdentifier(StringRef S) {
756 return !S.empty() && (isAlpha(C: S[0]) || S[0] == '_') &&
757 llvm::all_of(Range: llvm::drop_begin(RangeOrContainer&: S),
758 P: [](char C) { return C == '_' || isAlnum(C); });
759}
760
761Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
762 SmallVectorImpl<StringRef> &OutputFiles,
763 const ArgList &Args) {
764 llvm::TimeTraceScope TimeScope("Link bitcode files");
765 const llvm::Triple Triple(Args.getLastArgValue(Id: OPT_triple_EQ));
766 StringRef Arch = Args.getLastArgValue(Id: OPT_arch_EQ);
767
768 SmallVector<OffloadFile, 4> BitcodeInputFiles;
769 DenseSet<StringRef> StrongResolutions;
770 DenseSet<StringRef> UsedInRegularObj;
771 DenseSet<StringRef> UsedInSharedLib;
772 BumpPtrAllocator Alloc;
773 StringSaver Saver(Alloc);
774
775 // Search for bitcode files in the input and create an LTO input file. If it
776 // is not a bitcode file, scan its symbol table for symbols we need to save.
777 for (OffloadFile &File : InputFiles) {
778 MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "");
779
780 file_magic Type = identify_magic(magic: Buffer.getBuffer());
781 switch (Type) {
782 case file_magic::bitcode: {
783 Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(MBRef: Buffer);
784 if (!IRSymtabOrErr)
785 return IRSymtabOrErr.takeError();
786
787 // Check for any strong resolutions we need to preserve.
788 for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
789 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
790 if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() &&
791 !Sym.isUndefined())
792 StrongResolutions.insert(V: Saver.save(S: Sym.Name));
793 }
794 }
795 BitcodeInputFiles.emplace_back(Args: std::move(File));
796 continue;
797 }
798 case file_magic::elf_relocatable:
799 case file_magic::elf_shared_object: {
800 Expected<std::unique_ptr<ObjectFile>> ObjFile =
801 ObjectFile::createObjectFile(Object: Buffer);
802 if (!ObjFile)
803 continue;
804
805 for (SymbolRef Sym : (*ObjFile)->symbols()) {
806 Expected<StringRef> Name = Sym.getName();
807 if (!Name)
808 return Name.takeError();
809
810 // Record if we've seen these symbols in any object or shared libraries.
811 if ((*ObjFile)->isRelocatableObject())
812 UsedInRegularObj.insert(V: Saver.save(S: *Name));
813 else
814 UsedInSharedLib.insert(V: Saver.save(S: *Name));
815 }
816 continue;
817 }
818 default:
819 continue;
820 }
821 }
822
823 if (BitcodeInputFiles.empty())
824 return Error::success();
825
826 // Remove all the bitcode files that we moved from the original input.
827 llvm::erase_if(C&: InputFiles, P: [](OffloadFile &F) { return !F.getBinary(); });
828
829 // LTO Module hook to output bitcode without running the backend.
830 SmallVector<StringRef> BitcodeOutput;
831 auto OutputBitcode = [&](size_t, const Module &M) {
832 auto TempFileOrErr = createOutputFile(Prefix: sys::path::filename(path: ExecutableName) +
833 "-jit-" + Triple.getTriple(),
834 Extension: "bc");
835 if (!TempFileOrErr)
836 reportError(E: TempFileOrErr.takeError());
837
838 std::error_code EC;
839 raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
840 if (EC)
841 reportError(E: errorCodeToError(EC));
842 WriteBitcodeToFile(M, Out&: LinkedBitcode);
843 BitcodeOutput.push_back(Elt: *TempFileOrErr);
844 return false;
845 };
846
847 // We assume visibility of the whole program if every input file was bitcode.
848 auto Features = getTargetFeatures(InputFiles: BitcodeInputFiles);
849 auto LTOBackend = Args.hasArg(Ids: OPT_embed_bitcode) ||
850 Args.hasArg(Ids: OPT_builtin_bitcode_EQ) ||
851 Args.hasArg(Ids: OPT_clang_backend)
852 ? createLTO(Args, Features, Hook: OutputBitcode)
853 : createLTO(Args, Features);
854
855 // We need to resolve the symbols so the LTO backend knows which symbols need
856 // to be kept or can be internalized. This is a simplified symbol resolution
857 // scheme to approximate the full resolution a linker would do.
858 uint64_t Idx = 0;
859 DenseSet<StringRef> PrevailingSymbols;
860 for (auto &BitcodeInput : BitcodeInputFiles) {
861 // Get a semi-unique buffer identifier for Thin-LTO.
862 StringRef Identifier = Saver.save(
863 S: std::to_string(val: Idx++) + "." +
864 BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier());
865 MemoryBufferRef Buffer =
866 MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier);
867 Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr =
868 llvm::lto::InputFile::create(Object: Buffer);
869 if (!BitcodeFileOrErr)
870 return BitcodeFileOrErr.takeError();
871
872 // Save the input file and the buffer associated with its memory.
873 const auto Symbols = (*BitcodeFileOrErr)->symbols();
874 SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
875 size_t Idx = 0;
876 for (auto &Sym : Symbols) {
877 lto::SymbolResolution &Res = Resolutions[Idx++];
878
879 // We will use this as the prevailing symbol definition in LTO unless
880 // it is undefined or another definition has already been used.
881 Res.Prevailing =
882 !Sym.isUndefined() &&
883 !(Sym.isWeak() && StrongResolutions.contains(V: Sym.getName())) &&
884 PrevailingSymbols.insert(V: Saver.save(S: Sym.getName())).second;
885
886 // We need LTO to preseve the following global symbols:
887 // 1) Symbols used in regular objects.
888 // 2) Sections that will be given a __start/__stop symbol.
889 // 3) Prevailing symbols that are needed visible to external libraries.
890 Res.VisibleToRegularObj =
891 UsedInRegularObj.contains(V: Sym.getName()) ||
892 isValidCIdentifier(S: Sym.getSectionName()) ||
893 (Res.Prevailing &&
894 (Sym.getVisibility() != GlobalValue::HiddenVisibility &&
895 !Sym.canBeOmittedFromSymbolTable()));
896
897 // Identify symbols that must be exported dynamically and can be
898 // referenced by other files.
899 Res.ExportDynamic =
900 Sym.getVisibility() != GlobalValue::HiddenVisibility &&
901 (UsedInSharedLib.contains(V: Sym.getName()) ||
902 !Sym.canBeOmittedFromSymbolTable());
903
904 // The final definition will reside in this linkage unit if the symbol is
905 // defined and local to the module. This only checks for bitcode files,
906 // full assertion will require complete symbol resolution.
907 Res.FinalDefinitionInLinkageUnit =
908 Sym.getVisibility() != GlobalValue::DefaultVisibility &&
909 (!Sym.isUndefined() && !Sym.isCommon());
910
911 // We do not support linker redefined symbols (e.g. --wrap) for device
912 // image linking, so the symbols will not be changed after LTO.
913 Res.LinkerRedefined = false;
914 }
915
916 // Add the bitcode file with its resolved symbols to the LTO job.
917 if (Error Err = LTOBackend->add(Obj: std::move(*BitcodeFileOrErr), Res: Resolutions))
918 return Err;
919 }
920
921 // Run the LTO job to compile the bitcode.
922 size_t MaxTasks = LTOBackend->getMaxTasks();
923 SmallVector<StringRef> Files(MaxTasks);
924 auto AddStream =
925 [&](size_t Task,
926 const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
927 int FD = -1;
928 auto &TempFile = Files[Task];
929 StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o";
930 std::string TaskStr = Task ? "." + std::to_string(val: Task) : "";
931 auto TempFileOrErr =
932 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
933 Triple.getTriple() + "." + Arch + TaskStr,
934 Extension);
935 if (!TempFileOrErr)
936 reportError(E: TempFileOrErr.takeError());
937 TempFile = *TempFileOrErr;
938 if (std::error_code EC = sys::fs::openFileForWrite(Name: TempFile, ResultFD&: FD))
939 reportError(E: errorCodeToError(EC));
940 return std::make_unique<CachedFileStream>(
941 args: std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true));
942 };
943
944 if (Error Err = LTOBackend->run(AddStream))
945 return Err;
946
947 if (LTOError)
948 return createStringError(Fmt: "Errors encountered inside the LTO pipeline.");
949
950 // If we are embedding bitcode we only need the intermediate output.
951 bool SingleOutput = Files.size() == 1;
952 if (Args.hasArg(Ids: OPT_embed_bitcode)) {
953 if (BitcodeOutput.size() != 1 || !SingleOutput)
954 return createStringError(Fmt: "Cannot embed bitcode with multiple files.");
955 OutputFiles.push_back(Elt: Args.MakeArgString(Str: BitcodeOutput.front()));
956 return Error::success();
957 }
958
959 // Append the new inputs to the device linker input. If the user requested an
960 // internalizing link we need to pass the bitcode to clang.
961 for (StringRef File :
962 Args.hasArg(Ids: OPT_clang_backend) || Args.hasArg(Ids: OPT_builtin_bitcode_EQ)
963 ? BitcodeOutput
964 : Files)
965 OutputFiles.push_back(Elt: File);
966
967 return Error::success();
968}
969
970Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
971 const OffloadBinary &Binary = *File.getBinary();
972
973 StringRef Prefix =
974 sys::path::stem(path: Binary.getMemoryBufferRef().getBufferIdentifier());
975 StringRef Suffix = getImageKindName(Name: Binary.getImageKind());
976
977 auto TempFileOrErr = createOutputFile(
978 Prefix: Prefix + "-" + Binary.getTriple() + "-" + Binary.getArch(), Extension: Suffix);
979 if (!TempFileOrErr)
980 return TempFileOrErr.takeError();
981
982 Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
983 FileOutputBuffer::create(FilePath: *TempFileOrErr, Size: Binary.getImage().size());
984 if (!OutputOrErr)
985 return OutputOrErr.takeError();
986 std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
987 llvm::copy(Range: Binary.getImage(), Out: Output->getBufferStart());
988 if (Error E = Output->commit())
989 return std::move(E);
990
991 return *TempFileOrErr;
992}
993
994// Compile the module to an object file using the appropriate target machine for
995// the host triple.
996Expected<StringRef> compileModule(Module &M, OffloadKind Kind) {
997 llvm::TimeTraceScope TimeScope("Compile module");
998 std::string Msg;
999 const Target *T = TargetRegistry::lookupTarget(Triple: M.getTargetTriple(), Error&: Msg);
1000 if (!T)
1001 return createStringError(S: Msg);
1002
1003 auto Options =
1004 codegen::InitTargetOptionsFromCodeGenFlags(TheTriple: Triple(M.getTargetTriple()));
1005 StringRef CPU = "";
1006 StringRef Features = "";
1007 std::unique_ptr<TargetMachine> TM(
1008 T->createTargetMachine(TT: M.getTargetTriple(), CPU, Features, Options,
1009 RM: Reloc::PIC_, CM: M.getCodeModel()));
1010
1011 if (M.getDataLayout().isDefault())
1012 M.setDataLayout(TM->createDataLayout());
1013
1014 int FD = -1;
1015 auto TempFileOrErr =
1016 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
1017 getOffloadKindName(Name: Kind) + ".image.wrapper",
1018 Extension: "o");
1019 if (!TempFileOrErr)
1020 return TempFileOrErr.takeError();
1021 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
1022 return errorCodeToError(EC);
1023
1024 auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: FD, args: true);
1025
1026 legacy::PassManager CodeGenPasses;
1027 TargetLibraryInfoImpl TLII(Triple(M.getTargetTriple()));
1028 CodeGenPasses.add(P: new TargetLibraryInfoWrapperPass(TLII));
1029 if (TM->addPassesToEmitFile(CodeGenPasses, *OS, nullptr,
1030 CodeGenFileType::ObjectFile))
1031 return createStringError(Fmt: "Failed to execute host backend");
1032 CodeGenPasses.run(M);
1033
1034 return *TempFileOrErr;
1035}
1036
1037/// Creates the object file containing the device image and runtime
1038/// registration code from the device images stored in \p Images.
1039Expected<StringRef>
1040wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
1041 const ArgList &Args, OffloadKind Kind) {
1042 llvm::TimeTraceScope TimeScope("Wrap bundled images");
1043
1044 SmallVector<ArrayRef<char>, 4> BuffersToWrap;
1045 for (const auto &Buffer : Buffers)
1046 BuffersToWrap.emplace_back(
1047 Args: ArrayRef<char>(Buffer->getBufferStart(), Buffer->getBufferSize()));
1048
1049 LLVMContext Context;
1050 Module M("offload.wrapper.module", Context);
1051 M.setTargetTriple(
1052 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1053
1054 switch (Kind) {
1055 case OFK_OpenMP:
1056 if (Error Err = offloading::wrapOpenMPBinaries(
1057 M, Images: BuffersToWrap,
1058 EntryArray: offloading::getOffloadEntryArray(M, SectionName: "omp_offloading_entries"),
1059 /*Suffix=*/"", /*Relocatable=*/Args.hasArg(Ids: OPT_relocatable)))
1060 return std::move(Err);
1061 break;
1062 case OFK_Cuda:
1063 if (Error Err = offloading::wrapCudaBinary(
1064 M, Images: BuffersToWrap.front(),
1065 EntryArray: offloading::getOffloadEntryArray(M, SectionName: "cuda_offloading_entries"),
1066 /*Suffix=*/"", /*EmitSurfacesAndTextures=*/false))
1067 return std::move(Err);
1068 break;
1069 case OFK_HIP:
1070 if (Error Err = offloading::wrapHIPBinary(
1071 M, Images: BuffersToWrap.front(),
1072 EntryArray: offloading::getOffloadEntryArray(M, SectionName: "hip_offloading_entries")))
1073 return std::move(Err);
1074 break;
1075 default:
1076 return createStringError(S: getOffloadKindName(Name: Kind) +
1077 " wrapping is not supported");
1078 }
1079
1080 if (Args.hasArg(Ids: OPT_print_wrapped_module))
1081 errs() << M;
1082 if (Args.hasArg(Ids: OPT_save_temps)) {
1083 int FD = -1;
1084 auto TempFileOrErr =
1085 createOutputFile(Prefix: sys::path::filename(path: ExecutableName) + "." +
1086 getOffloadKindName(Name: Kind) + ".image.wrapper",
1087 Extension: "bc");
1088 if (!TempFileOrErr)
1089 return TempFileOrErr.takeError();
1090 if (std::error_code EC = sys::fs::openFileForWrite(Name: *TempFileOrErr, ResultFD&: FD))
1091 return errorCodeToError(EC);
1092 llvm::raw_fd_ostream OS(FD, true);
1093 WriteBitcodeToFile(M, Out&: OS);
1094 }
1095
1096 auto FileOrErr = compileModule(M, Kind);
1097 if (!FileOrErr)
1098 return FileOrErr.takeError();
1099 return *FileOrErr;
1100}
1101
1102Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
1103bundleOpenMP(ArrayRef<OffloadingImage> Images) {
1104 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
1105 for (const OffloadingImage &Image : Images)
1106 Buffers.emplace_back(
1107 Args: MemoryBuffer::getMemBufferCopy(InputData: OffloadBinary::write(Image)));
1108
1109 return std::move(Buffers);
1110}
1111
1112Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
1113bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
1114 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
1115 for (const OffloadingImage &Image : Images)
1116 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
1117 y: Image.StringData.lookup(Key: "arch")));
1118
1119 Triple TheTriple = Triple(Images.front().StringData.lookup(Key: "triple"));
1120 auto FileOrErr = nvptx::fatbinary(InputFiles, Args);
1121 if (!FileOrErr)
1122 return FileOrErr.takeError();
1123
1124 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
1125 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
1126
1127 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
1128 if (std::error_code EC = ImageOrError.getError())
1129 return createFileError(F: *FileOrErr, EC);
1130 Buffers.emplace_back(Args: std::move(*ImageOrError));
1131
1132 return std::move(Buffers);
1133}
1134
1135Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
1136bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList &Args) {
1137 SmallVector<std::pair<StringRef, StringRef>, 4> InputFiles;
1138 for (const OffloadingImage &Image : Images)
1139 InputFiles.emplace_back(Args: std::make_pair(x: Image.Image->getBufferIdentifier(),
1140 y: Image.StringData.lookup(Key: "arch")));
1141
1142 Triple TheTriple = Triple(Images.front().StringData.lookup(Key: "triple"));
1143 auto FileOrErr = amdgcn::fatbinary(InputFiles, Args);
1144 if (!FileOrErr)
1145 return FileOrErr.takeError();
1146
1147 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ImageOrError =
1148 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *FileOrErr);
1149
1150 SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
1151 if (std::error_code EC = ImageOrError.getError())
1152 return createFileError(F: *FileOrErr, EC);
1153 Buffers.emplace_back(Args: std::move(*ImageOrError));
1154
1155 return std::move(Buffers);
1156}
1157
1158/// Transforms the input \p Images into the binary format the runtime expects
1159/// for the given \p Kind.
1160Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
1161bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const ArgList &Args,
1162 OffloadKind Kind) {
1163 llvm::TimeTraceScope TimeScope("Bundle linked output");
1164 switch (Kind) {
1165 case OFK_OpenMP:
1166 return bundleOpenMP(Images);
1167 case OFK_Cuda:
1168 return bundleCuda(Images, Args);
1169 case OFK_HIP:
1170 return bundleHIP(Images, Args);
1171 default:
1172 return createStringError(S: getOffloadKindName(Name: Kind) +
1173 " bundling is not supported");
1174 }
1175}
1176
1177/// Returns a new ArgList containg arguments used for the device linking phase.
1178DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
1179 const InputArgList &Args) {
1180 DerivedArgList DAL = DerivedArgList(DerivedArgList(Args));
1181 for (Arg *A : Args)
1182 DAL.append(A);
1183
1184 // Set the subarchitecture and target triple for this compilation.
1185 const OptTable &Tbl = getOptTable();
1186 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_arch_EQ),
1187 Value: Args.MakeArgString(Str: Input.front().getBinary()->getArch()));
1188 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_triple_EQ),
1189 Value: Args.MakeArgString(Str: Input.front().getBinary()->getTriple()));
1190
1191 // If every input file is bitcode we have whole program visibility as we do
1192 // only support static linking with bitcode.
1193 auto ContainsBitcode = [](const OffloadFile &F) {
1194 return identify_magic(magic: F.getBinary()->getImage()) == file_magic::bitcode;
1195 };
1196 if (llvm::all_of(Range&: Input, P: ContainsBitcode))
1197 DAL.AddFlagArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_whole_program));
1198
1199 // Forward '-Xoffload-linker' options to the appropriate backend.
1200 for (StringRef Arg : Args.getAllArgValues(Id: OPT_device_linker_args_EQ)) {
1201 auto [Triple, Value] = Arg.split(Separator: '=');
1202 if (Value.empty())
1203 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
1204 Value: Args.MakeArgString(Str: Triple));
1205 else if (Triple == DAL.getLastArgValue(Id: OPT_triple_EQ))
1206 DAL.AddJoinedArg(BaseArg: nullptr, Opt: Tbl.getOption(Opt: OPT_linker_arg_EQ),
1207 Value: Args.MakeArgString(Str: Value));
1208 }
1209
1210 return DAL;
1211}
1212
1213Error handleOverrideImages(
1214 const InputArgList &Args,
1215 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> &Images) {
1216 for (StringRef Arg : Args.getAllArgValues(Id: OPT_override_image)) {
1217 OffloadKind Kind = getOffloadKind(Name: Arg.split(Separator: "=").first);
1218 StringRef Filename = Arg.split(Separator: "=").second;
1219
1220 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1221 MemoryBuffer::getFileOrSTDIN(Filename);
1222 if (std::error_code EC = BufferOrErr.getError())
1223 return createFileError(F: Filename, EC);
1224
1225 Expected<std::unique_ptr<ObjectFile>> ElfOrErr =
1226 ObjectFile::createELFObjectFile(Object: **BufferOrErr,
1227 /*InitContent=*/false);
1228 if (!ElfOrErr)
1229 return ElfOrErr.takeError();
1230 ObjectFile &Elf = **ElfOrErr;
1231
1232 OffloadingImage TheImage{};
1233 TheImage.TheImageKind = IMG_Object;
1234 TheImage.TheOffloadKind = Kind;
1235 TheImage.StringData["triple"] =
1236 Args.MakeArgString(Str: Elf.makeTriple().getTriple());
1237 if (std::optional<StringRef> CPU = Elf.tryGetCPUName())
1238 TheImage.StringData["arch"] = Args.MakeArgString(Str: *CPU);
1239 TheImage.Image = std::move(*BufferOrErr);
1240
1241 Images[Kind].emplace_back(Args: std::move(TheImage));
1242 }
1243 return Error::success();
1244}
1245
1246/// Transforms all the extracted offloading input files into an image that can
1247/// be registered by the runtime.
1248Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
1249 SmallVectorImpl<SmallVector<OffloadFile>> &LinkerInputFiles,
1250 const InputArgList &Args, char **Argv, int Argc) {
1251 llvm::TimeTraceScope TimeScope("Handle all device input");
1252
1253 std::mutex ImageMtx;
1254 MapVector<OffloadKind, SmallVector<OffloadingImage, 0>> Images;
1255
1256 // Initialize the images with any overriding inputs.
1257 if (Args.hasArg(Ids: OPT_override_image))
1258 if (Error Err = handleOverrideImages(Args, Images))
1259 return std::move(Err);
1260
1261 auto Err = parallelForEachError(R&: LinkerInputFiles, Fn: [&](auto &Input) -> Error {
1262 llvm::TimeTraceScope TimeScope("Link device input");
1263
1264 // Each thread needs its own copy of the base arguments to maintain
1265 // per-device argument storage of synthetic strings.
1266 const OptTable &Tbl = getOptTable();
1267 BumpPtrAllocator Alloc;
1268 StringSaver Saver(Alloc);
1269 auto BaseArgs =
1270 Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [](StringRef Err) {
1271 reportError(E: createStringError(S: Err));
1272 });
1273 auto LinkerArgs = getLinkerArgs(Input, BaseArgs);
1274
1275 DenseSet<OffloadKind> ActiveOffloadKinds;
1276 for (const auto &File : Input)
1277 if (File.getBinary()->getOffloadKind() != OFK_None)
1278 ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
1279
1280 // First link and remove all the input files containing bitcode.
1281 SmallVector<StringRef> InputFiles;
1282 if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
1283 return Err;
1284
1285 // Write any remaining device inputs to an output file for the linker.
1286 for (const OffloadFile &File : Input) {
1287 auto FileNameOrErr = writeOffloadFile(File);
1288 if (!FileNameOrErr)
1289 return FileNameOrErr.takeError();
1290 InputFiles.emplace_back(Args&: *FileNameOrErr);
1291 }
1292
1293 // Link the remaining device files using the device linker.
1294 auto OutputOrErr = !Args.hasArg(Ids: OPT_embed_bitcode)
1295 ? linkDevice(InputFiles, LinkerArgs)
1296 : InputFiles.front();
1297 if (!OutputOrErr)
1298 return OutputOrErr.takeError();
1299
1300 // Store the offloading image for each linked output file.
1301 for (OffloadKind Kind : ActiveOffloadKinds) {
1302 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
1303 llvm::MemoryBuffer::getFileOrSTDIN(Filename: *OutputOrErr);
1304 if (std::error_code EC = FileOrErr.getError()) {
1305 if (DryRun)
1306 FileOrErr = MemoryBuffer::getMemBuffer(InputData: "");
1307 else
1308 return createFileError(*OutputOrErr, EC);
1309 }
1310
1311 std::scoped_lock<decltype(ImageMtx)> Guard(ImageMtx);
1312 OffloadingImage TheImage{};
1313 TheImage.TheImageKind =
1314 Args.hasArg(Ids: OPT_embed_bitcode) ? IMG_Bitcode : IMG_Object;
1315 TheImage.TheOffloadKind = Kind;
1316 TheImage.StringData["triple"] =
1317 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_triple_EQ));
1318 TheImage.StringData["arch"] =
1319 Args.MakeArgString(Str: LinkerArgs.getLastArgValue(OPT_arch_EQ));
1320 TheImage.Image = std::move(*FileOrErr);
1321
1322 Images[Kind].emplace_back(Args: std::move(TheImage));
1323 }
1324 return Error::success();
1325 });
1326 if (Err)
1327 return std::move(Err);
1328
1329 // Create a binary image of each offloading image and embed it into a new
1330 // object file.
1331 SmallVector<StringRef> WrappedOutput;
1332 for (auto &[Kind, Input] : Images) {
1333 // We sort the entries before bundling so they appear in a deterministic
1334 // order in the final binary.
1335 llvm::sort(C&: Input, Comp: [](OffloadingImage &A, OffloadingImage &B) {
1336 return A.StringData["triple"] > B.StringData["triple"] ||
1337 A.StringData["arch"] > B.StringData["arch"] ||
1338 A.TheOffloadKind < B.TheOffloadKind;
1339 });
1340 auto BundledImagesOrErr = bundleLinkedOutput(Images: Input, Args, Kind);
1341 if (!BundledImagesOrErr)
1342 return BundledImagesOrErr.takeError();
1343 auto OutputOrErr = wrapDeviceImages(Buffers: *BundledImagesOrErr, Args, Kind);
1344 if (!OutputOrErr)
1345 return OutputOrErr.takeError();
1346 WrappedOutput.push_back(Elt: *OutputOrErr);
1347 }
1348
1349 return WrappedOutput;
1350}
1351
1352std::optional<std::string> findFile(StringRef Dir, StringRef Root,
1353 const Twine &Name) {
1354 SmallString<128> Path;
1355 if (Dir.starts_with(Prefix: "="))
1356 sys::path::append(path&: Path, a: Root, b: Dir.substr(Start: 1), c: Name);
1357 else
1358 sys::path::append(path&: Path, a: Dir, b: Name);
1359
1360 if (sys::fs::exists(Path))
1361 return static_cast<std::string>(Path);
1362 return std::nullopt;
1363}
1364
1365std::optional<std::string>
1366findFromSearchPaths(StringRef Name, StringRef Root,
1367 ArrayRef<StringRef> SearchPaths) {
1368 for (StringRef Dir : SearchPaths)
1369 if (std::optional<std::string> File = findFile(Dir, Root, Name))
1370 return File;
1371 return std::nullopt;
1372}
1373
1374std::optional<std::string>
1375searchLibraryBaseName(StringRef Name, StringRef Root,
1376 ArrayRef<StringRef> SearchPaths) {
1377 for (StringRef Dir : SearchPaths) {
1378 if (std::optional<std::string> File =
1379 findFile(Dir, Root, Name: "lib" + Name + ".so"))
1380 return File;
1381 if (std::optional<std::string> File =
1382 findFile(Dir, Root, Name: "lib" + Name + ".a"))
1383 return File;
1384 }
1385 return std::nullopt;
1386}
1387
1388/// Search for static libraries in the linker's library path given input like
1389/// `-lfoo` or `-l:libfoo.a`.
1390std::optional<std::string> searchLibrary(StringRef Input, StringRef Root,
1391 ArrayRef<StringRef> SearchPaths) {
1392 if (Input.starts_with(Prefix: ":") || Input.ends_with(Suffix: ".lib"))
1393 return findFromSearchPaths(Name: Input.drop_front(), Root, SearchPaths);
1394 return searchLibraryBaseName(Name: Input, Root, SearchPaths);
1395}
1396
1397/// Common redeclaration of needed symbol flags.
1398enum Symbol : uint32_t {
1399 Sym_None = 0,
1400 Sym_Undefined = 1U << 1,
1401 Sym_Weak = 1U << 2,
1402};
1403
1404/// Scan the symbols from a BitcodeFile \p Buffer and record if we need to
1405/// extract any symbols from it.
1406Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
1407 bool IsArchive, StringSaver &Saver,
1408 DenseMap<StringRef, Symbol> &Syms) {
1409 Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(MBRef: Buffer);
1410 if (!IRSymtabOrErr)
1411 return IRSymtabOrErr.takeError();
1412
1413 bool ShouldExtract = !IsArchive;
1414 DenseMap<StringRef, Symbol> TmpSyms;
1415 for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
1416 for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
1417 if (Sym.isFormatSpecific() || !Sym.isGlobal())
1418 continue;
1419
1420 bool NewSymbol = Syms.count(Val: Sym.getName()) == 0;
1421 auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName()];
1422
1423 // We will extract if it defines a currenlty undefined non-weak symbol.
1424 bool ResolvesStrongReference =
1425 ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
1426 !Sym.isUndefined());
1427 // We will extract if it defines a new global symbol visible to the host.
1428 // This is only necessary for code targeting an offloading language.
1429 bool NewGlobalSymbol =
1430 ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined() &&
1431 !Sym.canBeOmittedFromSymbolTable() && Kind != object::OFK_None &&
1432 (Sym.getVisibility() != GlobalValue::HiddenVisibility));
1433 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1434
1435 // Update this symbol in the "table" with the new information.
1436 if (OldSym & Sym_Undefined && !Sym.isUndefined())
1437 TmpSyms[Saver.save(S: Sym.getName())] =
1438 static_cast<Symbol>(OldSym & ~Sym_Undefined);
1439 if (Sym.isUndefined() && NewSymbol)
1440 TmpSyms[Saver.save(S: Sym.getName())] =
1441 static_cast<Symbol>(OldSym | Sym_Undefined);
1442 if (Sym.isWeak())
1443 TmpSyms[Saver.save(S: Sym.getName())] =
1444 static_cast<Symbol>(OldSym | Sym_Weak);
1445 }
1446 }
1447
1448 // If the file gets extracted we update the table with the new symbols.
1449 if (ShouldExtract)
1450 Syms.insert(I: std::begin(cont&: TmpSyms), E: std::end(cont&: TmpSyms));
1451
1452 return ShouldExtract;
1453}
1454
1455/// Scan the symbols from an ObjectFile \p Obj and record if we need to extract
1456/// any symbols from it.
1457Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
1458 bool IsArchive, StringSaver &Saver,
1459 DenseMap<StringRef, Symbol> &Syms) {
1460 bool ShouldExtract = !IsArchive;
1461 DenseMap<StringRef, Symbol> TmpSyms;
1462 for (SymbolRef Sym : Obj.symbols()) {
1463 auto FlagsOrErr = Sym.getFlags();
1464 if (!FlagsOrErr)
1465 return FlagsOrErr.takeError();
1466
1467 if (!(*FlagsOrErr & SymbolRef::SF_Global) ||
1468 (*FlagsOrErr & SymbolRef::SF_FormatSpecific))
1469 continue;
1470
1471 auto NameOrErr = Sym.getName();
1472 if (!NameOrErr)
1473 return NameOrErr.takeError();
1474
1475 bool NewSymbol = Syms.count(Val: *NameOrErr) == 0;
1476 auto OldSym = NewSymbol ? Sym_None : Syms[*NameOrErr];
1477
1478 // We will extract if it defines a currenlty undefined non-weak symbol.
1479 bool ResolvesStrongReference = (OldSym & Sym_Undefined) &&
1480 !(OldSym & Sym_Weak) &&
1481 !(*FlagsOrErr & SymbolRef::SF_Undefined);
1482
1483 // We will extract if it defines a new global symbol visible to the host.
1484 // This is only necessary for code targeting an offloading language.
1485 bool NewGlobalSymbol =
1486 ((NewSymbol || (OldSym & Sym_Undefined)) &&
1487 !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
1488 !(*FlagsOrErr & SymbolRef::SF_Hidden));
1489 ShouldExtract |= ResolvesStrongReference | NewGlobalSymbol;
1490
1491 // Update this symbol in the "table" with the new information.
1492 if (OldSym & Sym_Undefined && !(*FlagsOrErr & SymbolRef::SF_Undefined))
1493 TmpSyms[Saver.save(S: *NameOrErr)] =
1494 static_cast<Symbol>(OldSym & ~Sym_Undefined);
1495 if (*FlagsOrErr & SymbolRef::SF_Undefined && NewSymbol)
1496 TmpSyms[Saver.save(S: *NameOrErr)] =
1497 static_cast<Symbol>(OldSym | Sym_Undefined);
1498 if (*FlagsOrErr & SymbolRef::SF_Weak)
1499 TmpSyms[Saver.save(S: *NameOrErr)] = static_cast<Symbol>(OldSym | Sym_Weak);
1500 }
1501
1502 // If the file gets extracted we update the table with the new symbols.
1503 if (ShouldExtract)
1504 Syms.insert(I: std::begin(cont&: TmpSyms), E: std::end(cont&: TmpSyms));
1505
1506 return ShouldExtract;
1507}
1508
1509/// Attempt to 'resolve' symbols found in input files. We use this to
1510/// determine if an archive member needs to be extracted. An archive member
1511/// will be extracted if any of the following is true.
1512/// 1) It defines an undefined symbol in a regular object filie.
1513/// 2) It defines a global symbol without hidden visibility that has not
1514/// yet been defined.
1515Expected<bool> getSymbols(StringRef Image, OffloadKind Kind, bool IsArchive,
1516 StringSaver &Saver,
1517 DenseMap<StringRef, Symbol> &Syms) {
1518 MemoryBufferRef Buffer = MemoryBufferRef(Image, "");
1519 switch (identify_magic(magic: Image)) {
1520 case file_magic::bitcode:
1521 return getSymbolsFromBitcode(Buffer, Kind, IsArchive, Saver, Syms);
1522 case file_magic::elf_relocatable: {
1523 Expected<std::unique_ptr<ObjectFile>> ObjFile =
1524 ObjectFile::createObjectFile(Object: Buffer);
1525 if (!ObjFile)
1526 return ObjFile.takeError();
1527 return getSymbolsFromObject(Obj: **ObjFile, Kind, IsArchive, Saver, Syms);
1528 }
1529 default:
1530 return false;
1531 }
1532}
1533
1534/// Search the input files and libraries for embedded device offloading code
1535/// and add it to the list of files to be linked. Files coming from static
1536/// libraries are only added to the input if they are used by an existing
1537/// input file. Returns a list of input files intended for a single linking job.
1538Expected<SmallVector<SmallVector<OffloadFile>>>
1539getDeviceInput(const ArgList &Args) {
1540 llvm::TimeTraceScope TimeScope("ExtractDeviceCode");
1541
1542 // Skip all the input if the user is overriding the output.
1543 if (Args.hasArg(Ids: OPT_override_image))
1544 return SmallVector<SmallVector<OffloadFile>>();
1545
1546 StringRef Root = Args.getLastArgValue(Id: OPT_sysroot_EQ);
1547 SmallVector<StringRef> LibraryPaths;
1548 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_library_path, Ids: OPT_libpath))
1549 LibraryPaths.push_back(Elt: Arg->getValue());
1550
1551 BumpPtrAllocator Alloc;
1552 StringSaver Saver(Alloc);
1553
1554 // Try to extract device code from the linker input files.
1555 bool WholeArchive = Args.hasArg(Ids: OPT_wholearchive_flag) ? true : false;
1556 SmallVector<OffloadFile> ObjectFilesToExtract;
1557 SmallVector<OffloadFile> ArchiveFilesToExtract;
1558 for (const opt::Arg *Arg : Args.filtered(
1559 Ids: OPT_INPUT, Ids: OPT_library, Ids: OPT_whole_archive, Ids: OPT_no_whole_archive)) {
1560 if (Arg->getOption().matches(ID: OPT_whole_archive) ||
1561 Arg->getOption().matches(ID: OPT_no_whole_archive)) {
1562 WholeArchive = Arg->getOption().matches(ID: OPT_whole_archive);
1563 continue;
1564 }
1565
1566 std::optional<std::string> Filename =
1567 Arg->getOption().matches(ID: OPT_library)
1568 ? searchLibrary(Input: Arg->getValue(), Root, SearchPaths: LibraryPaths)
1569 : std::string(Arg->getValue());
1570
1571 if (!Filename && Arg->getOption().matches(ID: OPT_library))
1572 reportError(
1573 E: createStringError(Fmt: "unable to find library -l%s", Vals: Arg->getValue()));
1574
1575 if (!Filename || !sys::fs::exists(Path: *Filename) ||
1576 sys::fs::is_directory(Path: *Filename))
1577 continue;
1578
1579 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
1580 MemoryBuffer::getFileOrSTDIN(Filename: *Filename);
1581 if (std::error_code EC = BufferOrErr.getError())
1582 return createFileError(F: *Filename, EC);
1583
1584 MemoryBufferRef Buffer = **BufferOrErr;
1585 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::elf_shared_object)
1586 continue;
1587
1588 SmallVector<OffloadFile> Binaries;
1589 if (Error Err = extractOffloadBinaries(Buffer, Binaries))
1590 return std::move(Err);
1591
1592 for (auto &OffloadFile : Binaries) {
1593 if (identify_magic(magic: Buffer.getBuffer()) == file_magic::archive &&
1594 !WholeArchive)
1595 ArchiveFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1596 else
1597 ObjectFilesToExtract.emplace_back(Args: std::move(OffloadFile));
1598 }
1599 }
1600
1601 // Link all standard input files and update the list of symbols.
1602 MapVector<OffloadFile::TargetID, SmallVector<OffloadFile, 0>> InputFiles;
1603 DenseMap<OffloadFile::TargetID, DenseMap<StringRef, Symbol>> Syms;
1604 for (OffloadFile &Binary : ObjectFilesToExtract) {
1605 if (!Binary.getBinary())
1606 continue;
1607
1608 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1609 for (const auto &[ID, Input] : InputFiles)
1610 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1611 CompatibleTargets.emplace_back(Args: ID);
1612
1613 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1614 Expected<bool> ExtractOrErr = getSymbols(
1615 Image: Binary.getBinary()->getImage(), Kind: Binary.getBinary()->getOffloadKind(),
1616 /*IsArchive=*/false, Saver, Syms&: Syms[ID]);
1617 if (!ExtractOrErr)
1618 return ExtractOrErr.takeError();
1619
1620 // If another target needs this binary it must be copied instead.
1621 if (Index == CompatibleTargets.size() - 1)
1622 InputFiles[ID].emplace_back(Args: std::move(Binary));
1623 else
1624 InputFiles[ID].emplace_back(Args: Binary.copy());
1625 }
1626 }
1627
1628 // Archive members only extract if they define needed symbols. We do this
1629 // after every regular input file so that libraries may be included out of
1630 // order. This follows 'ld.lld' semantics which are more lenient.
1631 bool Extracted = true;
1632 while (Extracted) {
1633 Extracted = false;
1634 for (OffloadFile &Binary : ArchiveFilesToExtract) {
1635 // If the binary was previously extracted it will be set to null.
1636 if (!Binary.getBinary())
1637 continue;
1638
1639 SmallVector<OffloadFile::TargetID> CompatibleTargets = {Binary};
1640 for (const auto &[ID, Input] : InputFiles)
1641 if (object::areTargetsCompatible(LHS: Binary, RHS: ID))
1642 CompatibleTargets.emplace_back(Args: ID);
1643
1644 for (const auto &[Index, ID] : llvm::enumerate(First&: CompatibleTargets)) {
1645 // Only extract an if we have an an object matching this target.
1646 if (!InputFiles.count(Key: ID))
1647 continue;
1648
1649 Expected<bool> ExtractOrErr =
1650 getSymbols(Image: Binary.getBinary()->getImage(),
1651 Kind: Binary.getBinary()->getOffloadKind(), /*IsArchive=*/true,
1652 Saver, Syms&: Syms[ID]);
1653 if (!ExtractOrErr)
1654 return ExtractOrErr.takeError();
1655
1656 Extracted = *ExtractOrErr;
1657
1658 // Skip including the file if it is an archive that does not resolve
1659 // any symbols.
1660 if (!Extracted)
1661 continue;
1662
1663 // If another target needs this binary it must be copied instead.
1664 if (Index == CompatibleTargets.size() - 1)
1665 InputFiles[ID].emplace_back(Args: std::move(Binary));
1666 else
1667 InputFiles[ID].emplace_back(Args: Binary.copy());
1668 }
1669
1670 // If we extracted any files we need to check all the symbols again.
1671 if (Extracted)
1672 break;
1673 }
1674 }
1675
1676 for (StringRef Library : Args.getAllArgValues(Id: OPT_bitcode_library_EQ)) {
1677 auto FileOrErr = getInputBitcodeLibrary(Input: Library);
1678 if (!FileOrErr)
1679 return FileOrErr.takeError();
1680 InputFiles[*FileOrErr].push_back(Elt: std::move(*FileOrErr));
1681 }
1682
1683 SmallVector<SmallVector<OffloadFile>> InputsForTarget;
1684 for (auto &[ID, Input] : InputFiles)
1685 InputsForTarget.emplace_back(Args: std::move(Input));
1686
1687 return std::move(InputsForTarget);
1688}
1689
1690} // namespace
1691
1692int main(int Argc, char **Argv) {
1693 InitLLVM X(Argc, Argv);
1694 InitializeAllTargetInfos();
1695 InitializeAllTargets();
1696 InitializeAllTargetMCs();
1697 InitializeAllAsmParsers();
1698 InitializeAllAsmPrinters();
1699
1700 LinkerExecutable = Argv[0];
1701 sys::PrintStackTraceOnErrorSignal(Argv0: Argv[0]);
1702
1703 const OptTable &Tbl = getOptTable();
1704 BumpPtrAllocator Alloc;
1705 StringSaver Saver(Alloc);
1706 auto Args = Tbl.parseArgs(Argc, Argv, Unknown: OPT_INVALID, Saver, ErrorFn: [&](StringRef Err) {
1707 reportError(E: createStringError(S: Err));
1708 });
1709
1710 if (Args.hasArg(Ids: OPT_help) || Args.hasArg(Ids: OPT_help_hidden)) {
1711 Tbl.printHelp(
1712 OS&: outs(),
1713 Usage: "clang-linker-wrapper [options] -- <options to passed to the linker>",
1714 Title: "\nA wrapper utility over the host linker. It scans the input files\n"
1715 "for sections that require additional processing prior to linking.\n"
1716 "The will then transparently pass all arguments and input to the\n"
1717 "specified host linker to create the final binary.\n",
1718 ShowHidden: Args.hasArg(Ids: OPT_help_hidden), ShowAllAliases: Args.hasArg(Ids: OPT_help_hidden));
1719 return EXIT_SUCCESS;
1720 }
1721 if (Args.hasArg(Ids: OPT_v)) {
1722 printVersion(OS&: outs());
1723 return EXIT_SUCCESS;
1724 }
1725
1726 // This forwards '-mllvm' arguments to LLVM if present.
1727 SmallVector<const char *> NewArgv = {Argv[0]};
1728 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_mllvm))
1729 NewArgv.push_back(Elt: Arg->getValue());
1730 for (const opt::Arg *Arg : Args.filtered(Ids: OPT_offload_opt_eq_minus))
1731 NewArgv.push_back(Elt: Args.MakeArgString(Str: StringRef("-") + Arg->getValue()));
1732 SmallVector<PassPlugin, 1> PluginList;
1733 PassPlugins.setCallback([&](const std::string &PluginPath) {
1734 auto Plugin = PassPlugin::Load(Filename: PluginPath);
1735 if (!Plugin)
1736 report_fatal_error(Err: Plugin.takeError(), /*gen_crash_diag=*/false);
1737 PluginList.emplace_back(Args&: Plugin.get());
1738 });
1739 cl::ParseCommandLineOptions(argc: NewArgv.size(), argv: &NewArgv[0]);
1740
1741 Verbose = Args.hasArg(Ids: OPT_verbose);
1742 DryRun = Args.hasArg(Ids: OPT_dry_run);
1743 SaveTemps = Args.hasArg(Ids: OPT_save_temps);
1744 CudaBinaryPath = Args.getLastArgValue(Id: OPT_cuda_path_EQ).str();
1745
1746 llvm::Triple Triple(
1747 Args.getLastArgValue(Id: OPT_host_triple_EQ, Default: sys::getDefaultTargetTriple()));
1748 if (Args.hasArg(Ids: OPT_o))
1749 ExecutableName = Args.getLastArgValue(Id: OPT_o, Default: "a.out");
1750 else if (Args.hasArg(Ids: OPT_out))
1751 ExecutableName = Args.getLastArgValue(Id: OPT_out, Default: "a.exe");
1752 else
1753 ExecutableName = Triple.isOSWindows() ? "a.exe" : "a.out";
1754
1755 parallel::strategy = hardware_concurrency(ThreadCount: 1);
1756 if (auto *Arg = Args.getLastArg(Ids: OPT_wrapper_jobs)) {
1757 unsigned Threads = 0;
1758 if (!llvm::to_integer(S: Arg->getValue(), Num&: Threads) || Threads == 0)
1759 reportError(E: createStringError(Fmt: "%s: expected a positive integer, got '%s'",
1760 Vals: Arg->getSpelling().data(),
1761 Vals: Arg->getValue()));
1762 parallel::strategy = hardware_concurrency(ThreadCount: Threads);
1763 }
1764
1765 if (Args.hasArg(Ids: OPT_wrapper_time_trace_eq)) {
1766 unsigned Granularity;
1767 Args.getLastArgValue(Id: OPT_wrapper_time_trace_granularity, Default: "500")
1768 .getAsInteger(Radix: 10, Result&: Granularity);
1769 timeTraceProfilerInitialize(TimeTraceGranularity: Granularity, ProcName: Argv[0]);
1770 }
1771
1772 {
1773 llvm::TimeTraceScope TimeScope("Execute linker wrapper");
1774
1775 // Extract the device input files stored in the host fat binary.
1776 auto DeviceInputFiles = getDeviceInput(Args);
1777 if (!DeviceInputFiles)
1778 reportError(E: DeviceInputFiles.takeError());
1779
1780 // Link and wrap the device images extracted from the linker input.
1781 auto FilesOrErr =
1782 linkAndWrapDeviceFiles(LinkerInputFiles&: *DeviceInputFiles, Args, Argv, Argc);
1783 if (!FilesOrErr)
1784 reportError(E: FilesOrErr.takeError());
1785
1786 // Run the host linking job with the rendered arguments.
1787 if (Error Err = runLinker(Files: *FilesOrErr, Args))
1788 reportError(E: std::move(Err));
1789 }
1790
1791 if (const opt::Arg *Arg = Args.getLastArg(Ids: OPT_wrapper_time_trace_eq)) {
1792 if (Error Err = timeTraceProfilerWrite(PreferredFileName: Arg->getValue(), FallbackFileName: ExecutableName))
1793 reportError(E: std::move(Err));
1794 timeTraceProfilerCleanup();
1795 }
1796
1797 // Remove the temporary files created.
1798 if (!SaveTemps)
1799 for (const auto &TempFile : TempFiles)
1800 if (std::error_code EC = sys::fs::remove(path: TempFile))
1801 reportError(E: createFileError(F: TempFile, EC));
1802
1803 return EXIT_SUCCESS;
1804}
1805